Autokeras

2021. 5. 6. 18:18민공지능/딥러닝 & 머신러닝

autokeras.com/

autokeras 설치 : pip install autokeras

import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import autokeras as ak

(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)


# Initialize the image classifier.
clf = ak.ImageClassifier(overwrite=True, max_trials=1)
# max_trials = 최대 시도 설정

# Feed the image classifier with training data.
clf.fit(x_train, y_train, epochs=10)
# validation_split=0.2로 기본값 설정되어 있다.

# Predict with the best model.
predicted_y = clf.predict(x_test)
print(predicted_y)


# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))

이미지 분류, 이미지 회귀, 텍스트 분류, 텍스트 회귀 등에서 사용할 수 있다.

 

Autokeras의 홈페이지에서 Documentation에 들어가면 기본값 설정을 볼 수 있다.

autokeras가 실행되면 best_model과 checkpoint 파일들이 현재 directory에 많이 생성된다.

많은 데이터를 깃허브에 올릴 수 없기 때문에 따로 저장 경로를 설정해준다. 

 

회귀 데이터인 boston 예제를 StructuredDataRegressor로 돌려봤다.

import tensorflow as tf
import numpy as np
import autokeras as ak
from sklearn.datasets import load_boston

#1. 데이터
dataset = load_boston()
x = dataset.data
y = dataset.target

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size = 0.8, random_state = 42
)

print(x_train.shape, x_test.shape)
print(x.shape, y.shape)
# (506, 13) (506,)
'''
#2 . 모델구성
model = ak.StructuredDataRegressor(
    overwrite=True, 
    max_trials=10, 
    loss = 'mse',
    metrics=['mae'],
    directory='C:/data/ak/'
)

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
es = EarlyStopping(patience=4, verbose=1, restore_best_weights=True, monitor='val_loss', mode='min')
lr = ReduceLROnPlateau(patience=2, factor=0.5, verbose=1)
cp = ModelCheckpoint(monitor='val_loss', filepath='C:/data/mc/', save_best_only=True, save_weights_only=True)

model.fit(x_train, y_train, epochs=10) 

# 4. 평가 예측
loss, mae = model.evaluate(x_test, y_test, batch_size=64)
print("loss, mae : ", loss, mae)

y_predict = model.predict(x_test)

from sklearn.metrics import mean_squared_error
def RMSE(y_test, y_predict) : 
    return np.sqrt(mean_squared_error(y_test, y_predict))
print("RMSE : ", RMSE(y_test, y_predict))

from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_predict)
print("R2 : ", r2)

model_ak = model.export_model()
try:
    model_ak.save("C:/data/h5/ak_boston", save_format="tf")
except Exception:
    model_ak.save('C:/data/h5/ak_boston.h5')


best_model = model.tuner.get_best_model()
try:
    best_model.save("C:/data/h5/best_ak_boston", save_format="tf")
except Exception:
    best_model.save('C:/data/h5/best_ak_boston.h5')

# 저장 에러날 때
# https://autokeras.com/tutorial/export/

# model3 = load_model('ak_save_boston', custom_objects=ak.CUSTOM_OBJECTS)
# result_boston = model3.evaluate(x_test, y_test)

# y_pred = model3.predict(x_test)
# r2 = r2_score(y_test, y_pred)

# print("load_result :", result_boston, r2)


# ImageRegressor
# ValueError: Expect the data to ImageInput to have shape (batch_size, height, width, channels) or (batch_size, height, width) dimensions, but got input shape [64, 13]

# StructuredDataRegressor
# loss, mae :  15.613632202148438 2.7075181007385254
# RMSE :  3.9514088969530325
# R2 :  0.7870881386715735
'''

from tensorflow.keras.models import load_model
model = load_model('C:/data/h5/ak_boston', custom_objects=ak.CUSTOM_OBJECTS)
model.summary()


best_model = load_model('C:/data/ak/structured_data_regressor/best_model', custom_objects=ak.CUSTOM_OBJECTS)
best_model.summary()
###################################################################

result = model.evaluate(x_test, y_test)
print(result)

best_result = best_model.evaluate(x_test, y_test)
print(best_result)

'''
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
input_1 (InputLayer)         [(None, 13)]              0
_________________________________________________________________
multi_category_encoding (Mul (None, 13)                0
_________________________________________________________________
normalization (Normalization (None, 13)                27
_________________________________________________________________
dense (Dense)                (None, 128)               1792
_________________________________________________________________
re_lu (ReLU)                 (None, 128)               0
_________________________________________________________________
dense_1 (Dense)              (None, 32)                4128
_________________________________________________________________
re_lu_1 (ReLU)               (None, 32)                0
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056
_________________________________________________________________
re_lu_2 (ReLU)               (None, 32)                0
_________________________________________________________________
regression_head_1 (Dense)    (None, 1)                 33
=================================================================
Total params: 7,036
Trainable params: 7,009
Non-trainable params: 27
_________________________________________________________________
'''
# [15.613630294799805, 2.7075181007385254]

breast_cancer예제를 ImageClassifier로 돌려봤다.

import numpy as np
import tensorflow as tf
import autokeras as ak
from sklearn.datasets import load_breast_cancer

datasets = load_breast_cancer()

x = datasets.data
y = datasets.target

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size = 0.8, random_state = 42)

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

x_train = x_train.reshape(x_train.shape[0],x_train.shape[1],1)
x_test = x_test.reshape(x_test.shape[0],x_test.shape[1],1)
print(x_train.shape, x_test.shape)
'''
model = ak.ImageClassifier(
    overwrite=True, 
    max_trials=5, 
    loss = 'binary_crossentropy',
    metrics=['acc'],
    directory='C:/data/ak/'
)

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
es = EarlyStopping(patience=4, verbose=1, restore_best_weights=True, monitor='val_loss', mode='min')
lr = ReduceLROnPlateau(patience=2, factor=0.5, verbose=1)
cp = ModelCheckpoint(monitor='val_loss', filepath='C:/data/mc/', save_best_only=True, save_weights_only=True)
 
model.fit(x_train, y_train, epochs=10, validation_split=0.2,
            callbacks=[es,lr,cp]) 

result = model.evaluate(x_test, y_test)

print(result)

model_ak = model.export_model()
model_ak.save('C:/data/h5/ak_cancer.h5') 

best_model = model.tuner.get_best_model()
best_model.save('C:/data/h5/best_ak_cancer.h5')

# [0.06950535625219345, 0.9649122953414917]
'''

from tensorflow.keras.models import load_model
model = load_model('C:/data/h5/ak_cancer.h5')
model.summary()


best_model = load_model('C:/data/ak/image_classifier/best_model', custom_objects=ak.CUSTOM_OBJECTS)
best_model.summary()
###################################################################

result = model.evaluate(x_test, y_test)
print(result)

best_result = best_model.evaluate(x_test, y_test)
print(best_result)

'''
Layer (type)                 Output Shape              Param #
=================================================================
input_1 (InputLayer)         [(None, 30, 1)]           0
_________________________________________________________________
cast_to_float32 (CastToFloat (None, 30, 1)             0
_________________________________________________________________
expand_last_dim (ExpandLastD (None, 30, 1, 1)          0
_________________________________________________________________
normalization (Normalization (None, 30, 1, 1)          3
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 1, 32)         320
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 30, 1, 64)         18496
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 1, 64)         0
_________________________________________________________________
dropout (Dropout)            (None, 15, 1, 64)         0
_________________________________________________________________
flatten (Flatten)            (None, 960)               0
_________________________________________________________________
dropout_1 (Dropout)          (None, 960)               0
_________________________________________________________________
dense (Dense)                (None, 1)                 961
_________________________________________________________________
classification_head_1 (Activ (None, 1)                 0
=================================================================
Total params: 19,780
Trainable params: 19,777
Non-trainable params: 3
_________________________________________________________________

'''

# 4/4 [==============================] - 2s 7ms/step - loss: 0.0695 - acc: 0.9649
# [0.06950535625219345, 0.9649122953414917]
# 4/4 [==============================] - 0s 3ms/step - loss: 0.0695 - acc: 0.9649
# [0.06950535625219345, 0.9649122953414917]

max_trials를 조금만 줘서 직접 하이퍼 파라미터 튜닝한 것 보다는 결과가 좋지 않다.

max_trials와 epoch 수를 더 늘리면 결과가 좋게 나올 것 같다.

 

 

'민공지능 > 딥러닝 & 머신러닝' 카테고리의 다른 글

Optuna  (0) 2021.05.22
SVM(Support Vector Machine)  (0) 2021.05.22
EfficientNet  (0) 2021.05.22
TTA(Test Time Augmentation)  (0) 2021.05.22
Scikit-Learn의 Scaler  (0) 2021.05.22