Autokeras
2021. 5. 6. 18:18ㆍ민공지능/딥러닝 & 머신러닝
autokeras 설치 : pip install autokeras
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import autokeras as ak
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape) # (60000, 28, 28)
print(y_train.shape) # (60000,)
# Initialize the image classifier.
clf = ak.ImageClassifier(overwrite=True, max_trials=1)
# max_trials = 최대 시도 설정
# Feed the image classifier with training data.
clf.fit(x_train, y_train, epochs=10)
# validation_split=0.2로 기본값 설정되어 있다.
# Predict with the best model.
predicted_y = clf.predict(x_test)
print(predicted_y)
# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))
이미지 분류, 이미지 회귀, 텍스트 분류, 텍스트 회귀 등에서 사용할 수 있다.
Autokeras의 홈페이지에서 Documentation에 들어가면 기본값 설정을 볼 수 있다.
autokeras가 실행되면 best_model과 checkpoint 파일들이 현재 directory에 많이 생성된다.
많은 데이터를 깃허브에 올릴 수 없기 때문에 따로 저장 경로를 설정해준다.
회귀 데이터인 boston 예제를 StructuredDataRegressor로 돌려봤다.
import tensorflow as tf
import numpy as np
import autokeras as ak
from sklearn.datasets import load_boston
#1. 데이터
dataset = load_boston()
x = dataset.data
y = dataset.target
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size = 0.8, random_state = 42
)
print(x_train.shape, x_test.shape)
print(x.shape, y.shape)
# (506, 13) (506,)
'''
#2 . 모델구성
model = ak.StructuredDataRegressor(
overwrite=True,
max_trials=10,
loss = 'mse',
metrics=['mae'],
directory='C:/data/ak/'
)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
es = EarlyStopping(patience=4, verbose=1, restore_best_weights=True, monitor='val_loss', mode='min')
lr = ReduceLROnPlateau(patience=2, factor=0.5, verbose=1)
cp = ModelCheckpoint(monitor='val_loss', filepath='C:/data/mc/', save_best_only=True, save_weights_only=True)
model.fit(x_train, y_train, epochs=10)
# 4. 평가 예측
loss, mae = model.evaluate(x_test, y_test, batch_size=64)
print("loss, mae : ", loss, mae)
y_predict = model.predict(x_test)
from sklearn.metrics import mean_squared_error
def RMSE(y_test, y_predict) :
return np.sqrt(mean_squared_error(y_test, y_predict))
print("RMSE : ", RMSE(y_test, y_predict))
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_predict)
print("R2 : ", r2)
model_ak = model.export_model()
try:
model_ak.save("C:/data/h5/ak_boston", save_format="tf")
except Exception:
model_ak.save('C:/data/h5/ak_boston.h5')
best_model = model.tuner.get_best_model()
try:
best_model.save("C:/data/h5/best_ak_boston", save_format="tf")
except Exception:
best_model.save('C:/data/h5/best_ak_boston.h5')
# 저장 에러날 때
# https://autokeras.com/tutorial/export/
# model3 = load_model('ak_save_boston', custom_objects=ak.CUSTOM_OBJECTS)
# result_boston = model3.evaluate(x_test, y_test)
# y_pred = model3.predict(x_test)
# r2 = r2_score(y_test, y_pred)
# print("load_result :", result_boston, r2)
# ImageRegressor
# ValueError: Expect the data to ImageInput to have shape (batch_size, height, width, channels) or (batch_size, height, width) dimensions, but got input shape [64, 13]
# StructuredDataRegressor
# loss, mae : 15.613632202148438 2.7075181007385254
# RMSE : 3.9514088969530325
# R2 : 0.7870881386715735
'''
from tensorflow.keras.models import load_model
model = load_model('C:/data/h5/ak_boston', custom_objects=ak.CUSTOM_OBJECTS)
model.summary()
best_model = load_model('C:/data/ak/structured_data_regressor/best_model', custom_objects=ak.CUSTOM_OBJECTS)
best_model.summary()
###################################################################
result = model.evaluate(x_test, y_test)
print(result)
best_result = best_model.evaluate(x_test, y_test)
print(best_result)
'''
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 13)] 0
_________________________________________________________________
multi_category_encoding (Mul (None, 13) 0
_________________________________________________________________
normalization (Normalization (None, 13) 27
_________________________________________________________________
dense (Dense) (None, 128) 1792
_________________________________________________________________
re_lu (ReLU) (None, 128) 0
_________________________________________________________________
dense_1 (Dense) (None, 32) 4128
_________________________________________________________________
re_lu_1 (ReLU) (None, 32) 0
_________________________________________________________________
dense_2 (Dense) (None, 32) 1056
_________________________________________________________________
re_lu_2 (ReLU) (None, 32) 0
_________________________________________________________________
regression_head_1 (Dense) (None, 1) 33
=================================================================
Total params: 7,036
Trainable params: 7,009
Non-trainable params: 27
_________________________________________________________________
'''
# [15.613630294799805, 2.7075181007385254]
breast_cancer예제를 ImageClassifier로 돌려봤다.
import numpy as np
import tensorflow as tf
import autokeras as ak
from sklearn.datasets import load_breast_cancer
datasets = load_breast_cancer()
x = datasets.data
y = datasets.target
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size = 0.8, random_state = 42)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
x_train = x_train.reshape(x_train.shape[0],x_train.shape[1],1)
x_test = x_test.reshape(x_test.shape[0],x_test.shape[1],1)
print(x_train.shape, x_test.shape)
'''
model = ak.ImageClassifier(
overwrite=True,
max_trials=5,
loss = 'binary_crossentropy',
metrics=['acc'],
directory='C:/data/ak/'
)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
es = EarlyStopping(patience=4, verbose=1, restore_best_weights=True, monitor='val_loss', mode='min')
lr = ReduceLROnPlateau(patience=2, factor=0.5, verbose=1)
cp = ModelCheckpoint(monitor='val_loss', filepath='C:/data/mc/', save_best_only=True, save_weights_only=True)
model.fit(x_train, y_train, epochs=10, validation_split=0.2,
callbacks=[es,lr,cp])
result = model.evaluate(x_test, y_test)
print(result)
model_ak = model.export_model()
model_ak.save('C:/data/h5/ak_cancer.h5')
best_model = model.tuner.get_best_model()
best_model.save('C:/data/h5/best_ak_cancer.h5')
# [0.06950535625219345, 0.9649122953414917]
'''
from tensorflow.keras.models import load_model
model = load_model('C:/data/h5/ak_cancer.h5')
model.summary()
best_model = load_model('C:/data/ak/image_classifier/best_model', custom_objects=ak.CUSTOM_OBJECTS)
best_model.summary()
###################################################################
result = model.evaluate(x_test, y_test)
print(result)
best_result = best_model.evaluate(x_test, y_test)
print(best_result)
'''
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 30, 1)] 0
_________________________________________________________________
cast_to_float32 (CastToFloat (None, 30, 1) 0
_________________________________________________________________
expand_last_dim (ExpandLastD (None, 30, 1, 1) 0
_________________________________________________________________
normalization (Normalization (None, 30, 1, 1) 3
_________________________________________________________________
conv2d (Conv2D) (None, 30, 1, 32) 320
_________________________________________________________________
conv2d_1 (Conv2D) (None, 30, 1, 64) 18496
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 1, 64) 0
_________________________________________________________________
dropout (Dropout) (None, 15, 1, 64) 0
_________________________________________________________________
flatten (Flatten) (None, 960) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 960) 0
_________________________________________________________________
dense (Dense) (None, 1) 961
_________________________________________________________________
classification_head_1 (Activ (None, 1) 0
=================================================================
Total params: 19,780
Trainable params: 19,777
Non-trainable params: 3
_________________________________________________________________
'''
# 4/4 [==============================] - 2s 7ms/step - loss: 0.0695 - acc: 0.9649
# [0.06950535625219345, 0.9649122953414917]
# 4/4 [==============================] - 0s 3ms/step - loss: 0.0695 - acc: 0.9649
# [0.06950535625219345, 0.9649122953414917]
max_trials를 조금만 줘서 직접 하이퍼 파라미터 튜닝한 것 보다는 결과가 좋지 않다.
max_trials와 epoch 수를 더 늘리면 결과가 좋게 나올 것 같다.
'민공지능 > 딥러닝 & 머신러닝' 카테고리의 다른 글
Optuna (0) | 2021.05.22 |
---|---|
SVM(Support Vector Machine) (0) | 2021.05.22 |
EfficientNet (0) | 2021.05.22 |
TTA(Test Time Augmentation) (0) | 2021.05.22 |
Scikit-Learn의 Scaler (0) | 2021.05.22 |