パラメータチューニング
前回は全結合層の3層のパラメータ探索を行いました。
今回はその結果を用いて交差検証を行った結果、41.25% (+/- 35.04%)と今まで行ってきたモデルより精度が低い結果となってしまいました。
lossも十分に下がりきっておらず、これにより全結合層2層のモデルの方が良いということとなったため、
iphone等で撮影した画像を用いて、実用可能かを検証したいと思います。
ソースコードは以下に示します(VGG19_cross3.py)。
python:VGG19_cross3.py import keras import numpy as np import matplotlib.pyplot as plt import os import re import csv import tensorflow as tf from sklearn.model_selection import KFold from keras.utils import np_utils from keras.models import Sequential, Model from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten from keras.preprocessing.image import array_to_img, img_to_array, load_img from keras.applications import ResNet50, VGG19 from keras.callbacks import ModelCheckpoint, EarlyStopping from keras import optimizers from sklearn.model_selection import train_test_split from keras.backend import tensorflow_backend def list_pictures(directory, ext='jpg|jpeg|bmp|png|ppm'): return [os.path.join(root, f) for root, _, files in os.walk(directory) for f in files if re.match(r'([\w]+\.(?:' + ext + '))', f.lower())] X = [] Y = np.zeros((1000,3)) Y_pre = [] hidden_neurons1 = 3100 hidden_neurons2 = 4500 hidden_neurons3 = 1000 out_neurons = 3 batch_size = 8 nb_epochs = 200 fold_num = 5 es = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto') picture_name = [] path = './test_dataset/' for picture in list_pictures(path): picture_name.append(float(picture[picture.find(path)+len(path):picture.find('_0_')])) img = img_to_array(load_img(picture, target_size=(224,224))) X.append(img) with open('DB.csv',encoding="utf-8_sig") as f: for row in csv.reader(f, quoting=csv.QUOTE_NONNUMERIC): Y_pre.append(row) Y_pre = np.array(Y_pre) for i,name in enumerate(picture_name): Y[i,:]=Y_pre[np.where(Y_pre==name)[0],1:4] X = np.asarray(X) X = X / 255.0 print(X.shape) print(Y.shape) x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0, shuffle=False) print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) kf = KFold(n_splits=fold_num, random_state = 71, shuffle=False) cvscores = [] count = 0 for train, test in kf.split(x_train): config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) session = tf.Session(config=config) tensorflow_backend.set_session(session) input_tensor = Input(shape=(224, 224, 3)) resnet50 = VGG19(include_top=False, weights='imagenet', input_tensor=input_tensor) modelCheckpoint = ModelCheckpoint(filepath = 'Checkpoint_'+ str(count) +'.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1) top_model = Sequential() top_model.add(Flatten(input_shape=resnet50.output_shape[1:])) top_model.add(Dropout(0.2)) top_model.add(Dense(hidden_neurons1)) top_model.add(Activation("sigmoid")) top_model.add(Dropout(0.2)) top_model.add(Dense(hidden_neurons2)) top_model.add(Activation("sigmoid")) top_model.add(Dropout(0.2)) top_model.add(Dense(hidden_neurons3)) top_model.add(Activation("sigmoid")) top_model.add(Dropout(0.2)) top_model.add(Dense(out_neurons)) top_model.add(Activation("linear")) #top_model.summary() model = Model(input=resnet50.input, output=top_model(resnet50.output)) model.compile(loss="mean_squared_error", metrics = ['accuracy'], optimizer=optimizers.SGD(lr=0.0001)) #model.summary() result = model.fit(x_train[train], y_train[train], batch_size=batch_size, epochs=nb_epochs, verbose=1, validation_data=(x_train[test], y_train[test]), shuffle=True, callbacks=[modelCheckpoint,es]) model.load_weights('Checkpoint_'+ str(count) +'.h5') scores = model.evaluate(x_train[test], y_train[test], verbose=0) print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) cvscores.append(scores[1] * 100) result.history.keys() # ヒストリデータのラベルを見てみる] ep =len(result.history['acc']) plt.figure() plt.plot(range(1, ep+1), result.history['acc'], label="training") plt.plot(range(1, ep+1), result.history['val_acc'], label="validation") plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.legend() plt.savefig('acc_'+str(count)+'.png') plt.figure() plt.plot(range(1, ep+1), result.history['loss'], label="training") plt.plot(range(1, ep+1), result.history['val_loss'], label="validation") plt.xlabel('Epochs') plt.ylabel('loss') plt.legend() plt.savefig('loss_'+str(count)+'.png') print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores))) 可視化 model.summary() evascores=[] # モデル評価 for eva in range(1,6): #print(eva) model.load_weights('Checkpoint_'+ str(eva) +'.h5') loss, accuracy = model.evaluate(x_test, y_test, verbose=0) evascores.append(scores[1] * 100) print("%.2f%% (+/- %.2f%%)" % (np.mean(evascores), np.std(evascores))) print("end of script")
テスト用画像の収集
テスト用の画像として、家電をiPhone 7 Plusで撮影したものを7枚用意しました。
撮影した画像は以下の図1です。
それぞれ冷蔵庫、電子レンジ、空気清浄機、洗濯機です。
この寸法(幅x奥行x高さ) [mm]はそれぞれ685x633x1828、525x460x435、405x337x687、639x722x1021です。
それをプログラムで扱いやすいように、DB_test.csvに格納しています。
評価用モデルの作成
今までのデータセットをすべて学習して、新しい画像を用いて評価をするプログラムを作成しました。
これを使用すると、現在取得している画像から算出される値で大まかな精度評価が可能です。
学習速度やdropout、バッチサイズ等の細かな調整を行いながら学習を進めていった結果、未学習データに対してloss:0.05868486687541008、acc:1.0と非常に高いスコアを示しました。
loss及びaccカーブを以下に示します。
この結果から、まだ適応段階であると言えます。
今回は200epochsまでの設定だったため、このような結果となってしまいました。
次回はepoch数を1000に上げて再学習し、これに引き続き別の大型家電や別角度でトライして、このモデルのロバスト性を評価したいと思います。
ソースコードは以下に示します(VGG19_evaluation.py)。
python:VGG19_evaluation.py import keras import numpy as np import matplotlib.pyplot as plt import os import re import csv import tensorflow as tf from sklearn.model_selection import KFold from keras.utils import np_utils from keras.models import Sequential, Model from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Activation, Flatten from keras.preprocessing.image import array_to_img, img_to_array, load_img from keras.applications import ResNet50, VGG19 from keras.callbacks import ModelCheckpoint, EarlyStopping from keras import optimizers from sklearn.model_selection import train_test_split from keras.backend import tensorflow_backend config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) session = tf.Session(config=config) tensorflow_backend.set_session(session) def list_pictures(directory, ext='jpg|jpeg|bmp|png|ppm'): return [os.path.join(root, f) for root, _, files in os.walk(directory) for f in files if re.match(r'([\w]+\.(?:' + ext + '))', f.lower())] X = [] Y = np.zeros((1000,3)) Y_pre = [] hidden_neurons1 = 4000 hidden_neurons2 = 600 out_neurons = 3 batch_size = 8 nb_epochs = 200 picture_name = [] path = './test_dataset/' for picture in list_pictures(path): picture_name.append(float(picture[picture.find(path)+len(path):picture.find('_0_')])) img = img_to_array(load_img(picture, target_size=(224,224))) X.append(img) with open('DB.csv',encoding="utf-8_sig") as f: for row in csv.reader(f, quoting=csv.QUOTE_NONNUMERIC): Y_pre.append(row) Y_pre = np.array(Y_pre) for i,name in enumerate(picture_name): Y[i,:]=Y_pre[np.where(Y_pre==name)[0],1:4] X = np.asarray(X) X = X / 255.0 print(X.shape) print(Y.shape) x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=71, shuffle=True) print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) es = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1, mode='auto') modelCheckpoint = ModelCheckpoint(filepath = 'Checkpoint.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1) input_tensor = Input(shape=(224, 224, 3)) resnet50 = VGG19(include_top=False, weights='imagenet', input_tensor=input_tensor) top_model = Sequential() top_model.add(Flatten(input_shape=resnet50.output_shape[1:])) top_model.add(Dropout(0.5)) top_model.add(Dense(hidden_neurons1)) top_model.add(Activation("sigmoid")) top_model.add(Dropout(0.5)) top_model.add(Dense(hidden_neurons2)) top_model.add(Activation("sigmoid")) top_model.add(Dropout(0.5)) top_model.add(Dense(out_neurons)) top_model.add(Activation("linear")) #top_model.summary() model = Model(input=resnet50.input, output=top_model(resnet50.output)) model.compile(loss="mean_squared_error", metrics = ['accuracy'], optimizer=optimizers.SGD(lr=0.0001)) #model.summary() result = model.fit(x_train, y_train, batch_size=batch_size, epochs=nb_epochs, verbose=1, validation_data=(x_test, y_test), shuffle=True, callbacks=[modelCheckpoint,es]) model.load_weights('Checkpoint.h5') scores = model.evaluate(X, Y, verbose=0) print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) result.history.keys() # ヒストリデータのラベルを見てみる] ep =len(result.history['acc']) plt.figure() plt.plot(range(1, ep+1), result.history['acc'], label="training") plt.plot(range(1, ep+1), result.history['val_acc'], label="validation") plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.legend() plt.savefig('acc_test.png') plt.figure() plt.plot(range(1, ep+1), result.history['loss'], label="training") plt.plot(range(1, ep+1), result.history['val_loss'], label="validation") plt.xlabel('Epochs') plt.ylabel('loss') plt.legend() plt.savefig('loss_test.png') #可視化 model.summary() # モデル評価 print("Load test dataset") X_test = [] Y_test = np.zeros((7,3)) Y_pre_test = [] picture_name_test = [] path = './test_dataset_test/' for picture in list_pictures(path): picture_name_test.append(float(picture[picture.find(path)+len(path):picture.find('.jpg')])) img = img_to_array(load_img(picture, target_size=(224,224))) X_test.append(img) with open('DB_test.csv',encoding="utf-8_sig") as f: for row in csv.reader(f, quoting=csv.QUOTE_NONNUMERIC): Y_pre_test.append(row) Y_pre_test = np.array(Y_pre_test) for i,name in enumerate(picture_name_test): Y_test[i,:]=Y_pre_test[np.where(Y_pre_test==name)[0],1:4] X_test = np.asarray(X_test) X_test = X_test / 255.0 model.load_weights('Checkpoint.h5') loss, accuracy = model.evaluate(X_test, Y_test, verbose=0) print('loss: '+str(loss)+' , acc: '+str(accuracy)) for i in range(7): input = np.squeeze(X_test[i,:,:,:]) print(input.shape) pred = model.predict(input, batch_size=1, verbose=0) print(pred) print("end of script")