验证码识别实战

it2022-05-05 147

验证码识别实战

（1）数据集

若想正确识别分割？—— 不具备通用性整体识别？—— NZPP——> [13,25,15,15]——> []用独热码的形式即 NZPP -> [13, 25, 15, 15] [4, 26] -> [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1], [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0]]

（2）对数据集中的特征值、目标值怎么使用

（3）如何分类

如何衡量损失？ mnist分类——交叉熵 softmax映射 +交叉熵这里应用sigmoid交叉熵

（4）流程分析 1）读取图片数据 filename -> 标签值 2）解析csv文件，将标签值NZPP->[13, 25, 15, 15] 3）将filename和标签值联系起来 4）构建卷积神经网络->y_predict 5）构造损失函数 6）优化损失 7）计算准确率 8）开启会话、开启线程

最后代码如下并输入了一张图片进行测试

import tensorflow as tf import glob import pandas as pd #涉及文件读取（csv） import numpy as np import os tf.app.flags.DEFINE_integer("is_train", 2, "指定是否是训练模型，还是拿数据去预测") FLAGS = tf.app.flags.FLAGS def read_pic(): #读取图片数据 #1.构造文件名队列 #获取文件名列表，以前都是用os进行拼接 file_names = glob.glob("./GenPics/*.jpg") file_names_test = glob.glob("./test/*.jpg") #print("file_names:\n", file_names) file_queue = tf.train.string_input_producer(file_names) file_queue_test = tf.train.string_input_producer(file_names_test) #2.读取与解码 #构造读图片的阅读器 reader = tf.WholeFileReader() #读取阶段 filename, image = reader.read(file_queue) filename_test, image_test = reader.read(file_queue_test) #解码阶段 decoded = tf.image.decode_jpeg(image) decoded_test = tf.image.decode_jpeg(image_test) #更新形状，将图片形状确定下来以方便批处理 decoded.set_shape([20,80,3]) #高宽通道数 #print("decoded:\n",decoded) decoded_test.set_shape([20,80,3]) #修改图片的类型因为此时是unit8，但输入卷积层的数据必须是float32或者float64 image_cast = tf.cast(decoded, tf.float32) image_cast_test = tf.cast(decoded_test, tf.float32) #3.批处理 filename_batch, image_batch = tf.train.batch([filename, image_cast],batch_size=100, num_threads=1, capacity=100) filename_batch_test, image_batch_test = tf.train.batch([filename_test, image_cast_test],batch_size=1, num_threads=1, capacity=1) return filename_batch, image_batch,filename_batch_test, image_batch_test #接下来解析csv文件，建立文件名和标签值的对应表格 def parse_csv(): # 读取文件 csv_data = pd.read_csv("./Genpics/labels.csv", names=["file_num", "chars"], index_col="file_num") # 根据字母生成对应数字 # 如NZPP——>[13,25,15,15] # 创建空列表遍历 labels = [] for label in csv_data["chars"]: # print(label) letter = [] for word in label: # print(word) letter.append(ord(word) - ord("A")) # 将转好的数字放入letter labels.append(letter) # 将letter放入label csv_data["labels"] = labels return csv_data #将文件名与csv_data一一对应通过文件名查表 def filename2label(filename, csv_data): #print(filename) labels = [] for file_name in filename: #filter方法判断是否是数字 file_num = "".join(list(filter(str.isdigit, str(file_name)))) #查表 target = csv_data.loc[int(file_num), "labels"] labels.append(target) #print(labels) return np.array(labels) #定义权重生成器 def create_weights(shape): return tf.Variable(initial_value=tf.random_normal(shape=shape,stddev=0.01)) #构建卷积神经网络,得到y_predict def create_model(x): #x的形状为[None,20,80,3] # 1)第一个卷积大层 with tf.variable_scope("conv1"): # 卷积层 # 定义filter和偏置 conv1_weights = create_weights(shape=[5, 5, 3, 32]) conv1_bias = create_weights(shape=[32]) conv1_x = tf.nn.conv2d(input=x, filter=conv1_weights, strides=[1, 1, 1, 1], padding="SAME") + conv1_bias # 激活层 relu1_x = tf.nn.relu(conv1_x) # 池化层....5 pool1_x = tf.nn.max_pool(value=relu1_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") # 2）第二个卷积大层 with tf.variable_scope("conv2"): # x的形状为[None,10,40,32] # 卷积层 # 定义filter和偏置 conv2_weights = create_weights(shape=[5, 5, 32, 64]) conv2_bias = create_weights(shape=[64]) conv2_x = tf.nn.conv2d(input=pool1_x, filter=conv2_weights, strides=[1, 1, 1, 1], padding="SAME") + conv2_bias # 激活层 relu2_x = tf.nn.relu(conv2_x) # 池化层 pool2_x = tf.nn.max_pool(value=relu2_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") # 3）全连接层 # x的形状为[None,5,20,64] with tf.variable_scope("full_connection"): # 首先做形状修改 # [None, 5, 20, 64] - ------>[None, 5 * 20 * 64] # [None, 5 * 20 * 64] * [] = [None, 4*26] 所以权重为[5 * 20 * 64, 4*26] x_fc = tf.reshape(pool2_x, shape=[-1,5 * 20 * 64]) # 注意reshape没有None的用法需要用-1 weights_fc = create_weights(shape=[5 * 20 * 64, 4*26]) bias_fc = create_weights(shape=[4*26]) y_predict = tf.matmul(x_fc, weights_fc) + bias_fc return y_predict if __name__ == "__main__": filename, image, filename_test, image_test = read_pic() #变量接出来 csv_data = parse_csv() #1.准备数据 x = tf.placeholder(tf.float32, shape=[None,20,80,3]) y_true = tf.placeholder(tf.float32, shape=[None,104]) #因为计算损失的时候需要一维 #2.构建模型 y_predict = create_model(x) #3.构造损失函数 loss_list = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_predict) loss = tf.reduce_mean(loss_list) #4.优化损失 optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) #5.计算准确率 equal_list = tf.reduce_all( tf.equal(tf.argmax(tf.reshape(y_predict, shape=[-1, 4, 26]), axis=2), tf.argmax(tf.reshape(y_true, shape=[-1, 4, 26]), axis=2)), axis=1) accuracy = tf.reduce_mean(tf.cast(equal_list, tf.float32)) #初始化变量 init = tf.global_variables_initializer() #(2)收集要显示的变量 #先收集损失和准确率 tf.summary.scalar("losses", loss) tf.summary.scalar("accuracy",accuracy) tf.summary.scalar("accuracy", accuracy) #(3)合并所有变量op merged = tf.summary.merge_all() #创建模型保存与加载 saver = tf.train.Saver() #开启会话 with tf.Session() as sess: #初始化变量 sess.run(init) #开启线程 coord=tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess,coord=coord) # (1)创建一个events文件实例 file_writer = tf.summary.FileWriter("./tmp/summary2/", graph=sess.graph) # 加载模型 if os.path.exists("./tmp/modelckpt2/checkpoint"): saver.restore(sess, "./tmp/modelckpt2/cnn_model") # 注意modelckpt2这个文件夹要自己建立 # 也就是说模型保存和加载的时候也就是saver.save或saver.restore的路径需要自己建立否则会蓝屏 # 但是创建envents实例化的路径可以不用自己建立 if FLAGS.is_train == 1: for i in range(1000): filename_value, image_value = sess.run([filename, image]) #print("filename_value:\n", filename_value) print("image_value:\n", image_value) labels = filename2label(filename_value, csv_data) #将标签值转换为one-hot labels_value = tf.reshape(tf.one_hot(labels, depth=26), [-1, 4*26]).eval() _, error, accuracy_value = sess.run([optimizer, loss, accuracy],feed_dict={x:image_value, y_true:labels_value}) print("第%d次训练后损失为%f，准确率为%f" % (i+1, error, accuracy_value)) # 运行合变量op，写入事件文件当中 summary = sess.run(merged, feed_dict={x:image_value, y_true:labels_value}) file_writer.add_summary(summary, i) if i % 100 == 0: saver.save(sess, "./tmp/modelckpt2/cnn_model") else: #如果不是训练，则是用测试集对模型进行测试 for i in range(2): filename_value_test, image_value_test = sess.run([filename_test, image_test]) labels_test = filename2label(filename_value_test, csv_data) labels_value_test = tf.reshape(tf.one_hot(labels_test, depth=26), [-1, 4 * 26]).eval() accuracy_value = sess.run(accuracy,feed_dict={x: image_value_test, y_true: labels_value_test}) print("真实值为\n" , labels_test ) y_predict_final = tf.argmax(tf.reshape(y_predict, shape=[-1, 4, 26]), axis=2) print("预测值为\n",sess.run(y_predict_final,feed_dict={x: image_value_test, y_true: labels_value_test})) #回收线程 coord.request_stop() coord.join(threads)

专利

最新回复(0)