蒙恩招生网 蒙恩招生网4
2023-10-30
更新时间:2023-11-07 20:53:33作者:51data
自编码器一般
通过最小化目标和输入的差的平方和进行优化,即使输出尽可能地复现输入,因此自动编码器可以通过无监督训练的方式提取基于输入信号本身的固有特征,使网络的权值信息只源于信号本身。关于自编码器详细的文章可参考:[1]来杰,王晓丹,向前,等. 自编码器及其应用综述[J]. 通信学报,2021,42(9):218-230.例子可参考如下链接一个简单的自编码器的小例子 - 哥廷根数学学派的文章 - 知乎 https://zhuanlan.zhihu.com/p/544980376自编码器一般基于全连接,如下图所示添加图片注释,不超过 140 字(可选)全连接网络的运算量较大,噪声鲁棒性低,而卷积神经网络中的卷积层利用卷积核与输入信号的局部区域进行卷积运算,提取输入信号的局部区域特征。不同于全连接网络,卷积层由于具有局部连接和权值共享的特点,因此可以利用更少的参数获得更加丰富的特征,因此将卷积机制引入到自编码器中是自然而然的,关于卷积自动编码器的详细原理推导及相关应用,请参考如下相关论文。
添加图片注释,不超过 140 字(可选)
添加图片注释,不超过 140 字(可选)本例也属于人脸图像处理领域的hello world,属于比较简单的那种,目的是探索如何使用卷积自编码器对人脸图像进行处理,用的数据集也比较简单。
首先导入相关模块,主要用到tensorflow和keras模块
import numpy as npimport tensorflow as tfimport keras from keras.layers import Dense, Conv2D, Conv2DTranspose, MaxPool2D, UpSampling2D, Dropout, Inputfrom keras.preprocessing.image import img_to_arrayimport matplotlib.pyplot as pltimport cv2from tqdm import tqdm import osimport re import kerasfrom keras import layers
以正确的顺序获取文件
def sorted_alphanumeric(data): convert = lambda text: int(text) if text.isdigit() else text.lower() alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)',key)] return sorted(data,key = alphanum_key)
设置图像尺寸与图像路径,导入图像,并做简单的图像增强
SIZE = 256#图像路径image_path = '/content/photos'img_array = []sketch_path = '/content/sketches'sketch_array = []image_file = sorted_alphanumeric(os.listdir(image_path))sketch_file = sorted_alphanumeric(os.listdir(sketch_path))for i in tqdm(image_file): image = cv2.imread(image_path + '/' + i,1) print("Image is ",image) # 转换为rgb格式 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 调整图像大小 image = cv2.resize(image, (SIZE, SIZE)) #图像归一化 image = image.astype('float32') / 255.0 img_array.append(img_to_array(image)) #图像增强 # 水平翻转 img1 = cv2.flip(image,1) img_array.append(img_to_array(img1)) #垂直翻转 img2 = cv2.flip(image,-1) img_array.append(img_to_array(img2)) img3 = cv2.flip(image,-1) img3 = cv2.flip(img3,1) img_array.append(img_to_array(img3)) #顺时针旋转 img4 = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) img_array.append(img_to_array(img4)) #翻转旋转 img5 = cv2.flip(img4,1) img_array.append(img_to_array(img5)) #逆时针旋转 img6 = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) img_array.append(img_to_array(img6)) img7 = cv2.flip(img6,1) img_array.append(img_to_array(img7))
此外,对输出的面部草图的操作和上面一样,不再赘述
for i in tqdm(sketch_file): image = cv2.imread(sketch_path + '/' + i,1) 。。。。。。。
校准一下图像数量
print("Total number of sketch images:",len(sketch_array))print("Total number of images:",len(img_array))
Total number of sketch images: 1504 Total number of images: 1504训练集和测试集划分
train_sketch_image = sketch_array[:1400]train_image = img_array[:1400]test_sketch_image = sketch_array[1400:]test_image = img_array[1400:]train_sketch_image = np.reshape(train_sketch_image,(len(train_sketch_image),SIZE,SIZE,3))train_image = np.reshape(train_image, (len(train_image),SIZE,SIZE,3))print('Train color image shape:',train_image.shape)test_sketch_image = np.reshape(test_sketch_image,(len(test_sketch_image),SIZE,SIZE,3))test_image = np.reshape(test_image, (len(test_image),SIZE,SIZE,3))print('Test color image shape',test_image.shape)
训练“人脸图像到面部草图”模型,下面的代码比较容易理解,都是keras模块的一些比较基础的东西
#输入尺寸input_img = keras.Input(shape=(256, 256, 3))#编码阶段x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)x = layers.MaxPooling2D((2, 2), padding='same')(x)x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)x = layers.MaxPooling2D((2, 2), padding='same')(x)x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)encoded = layers.MaxPooling2D((2, 2), padding='same')(x)# 解码阶段x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)x = layers.UpSampling2D((2, 2))(x)x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)x = layers.UpSampling2D((2, 2))(x)x = layers.Conv2D(32, (3, 3), activation='relu')(x)x = layers.UpSampling2D((2, 2))(x)#decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)x = tf.keras.layers.Conv2DTranspose(8,(3,3), padding = 'valid')(x)decoder_output = tf.keras.layers.Conv2DTranspose(3,(3,3), padding = 'valid')(x)autoencoder = keras.Model(input_img, decoder_output)#autoencoder.compile(optimizer='adam', loss='binary_crossentropy')autoencoder.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001), loss = 'mean_absolute_error',metrics = ['acc'])decoder_output.shape
看一下网络的整体结构
autoencoder.summary()
Total params: 15,603 Trainable params: 15,603 Non-trainable params: 0开始进行网络训练,代码也较为简单
from keras.callbacks import TensorBoard#训练train_image.shapehist=autoencoder.fit(train_image, train_sketch_image, epochs=300, batch_size=128, shuffle=True, validation_data=(test_image, test_sketch_image))import pandas as pdimport matplotlib.pyplot as pltpd.DataFrame(hist.history).plot(figsize=(8, 5))plt.grid(True)plt.gca().set_ylim(0, 1) plt.show()
开始预测
decoded_imgs = autoencoder.predict(test_image)n = 10plt.figure(figsize=(20, 4))for i in range(1, n + 1): # 原始图像 ax = plt.subplot(2, n, i) plt.imshow(test_image[i].reshape(256, 256, 3)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # 重建图像 ax = plt.subplot(2, n, i + n) plt.imshow(decoded_imgs[i].reshape(256, 256, 3)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False)plt.show()
面部草图到人脸图像训练
from keras.callbacks import TensorBoardtrain_image.shape#hist=autoencoder.fit(train_image, train_sketch_image, epochs = 5, verbose = 0)hist=autoencoder.fit(train_sketch_image,train_image, epochs=1500, batch_size=128, shuffle=True, validation_data=(test_sketch_image, test_image))pd.DataFrame(hist.history).plot(figsize=(8, 5))plt.grid(True)plt.gca().set_ylim(0, 1) plt.show()
预测并可视化
decoded_imgs = autoencoder.predict(test_sketch_image)n = 10plt.figure(figsize=(20, 4))for i in range(1, n + 1): #原始图像 ax = plt.subplot(2, n, i) plt.imshow(test_sketch_image[i].reshape(256, 256, 3)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # 重建图像 ax = plt.subplot(2, n, i + n) plt.imshow(decoded_imgs[i].reshape(256, 256, 3)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False)plt.show()
此外,为更进一步的提升生成图像的质量,还可以进行微调,即Hyperparameter Tuning,keras有个第3方微调模块:pip install keras-tuner
from kerastuner import RandomSearchfrom kerastuner.engine.hyperparameters import HyperParameters
给个微调的简单的例子
tuner_search=RandomSearch(build_model, objective='val_acc', max_trials=5,directory='/content/output3',project_name="FaceToSketch")tuner_search.search(train_sketch_image,train_image,epochs=500,validation_split=0.1, validation_data=(test_sketch_image, test_image))model=tuner_search.get_best_models(num_models=1)[0]decoded_imgs = model.predict(test_sketch_image)n = 10plt.figure(figsize=(20, 4))for i in range(1, n + 1): # Display original ax = plt.subplot(2, n, i) plt.imshow(test_sketch_image[i].reshape(256, 256, 3)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # Display reconstruction ax = plt.subplot(2, n, i + n) plt.imshow(decoded_imgs[i].reshape(256, 256, 3)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False)plt.show()
详细的参数微调算法,以后再详细讲一下,毕竟这篇只是个hello world
版权声明:本文基于CC协议,以分享为目的,转载于今日头条,版权归原作者,如侵权请联系我们删除,内容仅供参考,不代表本站建议!