        第一卷 第二十一章 案例:使用CNN破解验证码

Breaking captchas with deep learning, Keras, and TensorFlow - PyImageSearchhttps://www.pyimagesearch.com/2021/07/14/breaking-captchas-with-deep-learning-keras-and-tensorflow/











# import the necessary packages
import argparse
import requests
import time
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-o", "--output", required=True,
	help="path to output directory of images")
ap.add_argument("-n", "--num-images", type=int,
	default=500, help="# of images to download")
args = vars(ap.parse_args())

# initialize the URL that contains the captcha images that we will
# be downloading along with the total number of images downloaded
# thus far
url = "https://www.e-zpassny.com/vector/jcaptcha.do"
total = 0

# loop over the number of images to download
for i in range(0, args["num_images"]):
		# try to grab a new captcha image
		r = requests.get(url, timeout=60)
		# save the image to disk
		p = os.path.sep.join([args["output"], "{}.jpg".format(
		f = open(p, "wb")
		# update the counter
		print("[INFO] downloaded: {}".format(p))
		total += 1
	# handle if any exceptions are thrown during the download process
		print("[INFO] error downloading image...")
	# insert a small sleep to be courteous to the server


# import the necessary packages
from imutils import paths
import argparse
import imutils
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True,
	help="path to input directory of images")
ap.add_argument("-a", "--annot", required=True,
	help="path to output directory of annotations")
args = vars(ap.parse_args())

# grab the image paths then initialize the dictionary of character
# counts
imagePaths = list(paths.list_images(args["input"]))
counts = {}

# loop over the image paths
for (i, imagePath) in enumerate(imagePaths):
	# display an update to the user
	print("[INFO] processing image {}/{}".format(i + 1,
		# load the image and convert it to grayscale, then pad the
		# image to ensure digits caught on the border of the image
		# are retained
		image = cv2.imread(imagePath)
		gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
		gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8,

  		# threshold the image to reveal the digits
		thresh = cv2.threshold(gray, 0, 255,

		# find contours in the image, keeping only the four largest
		# ones
		cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
		cnts = cnts[0] if imutils.is_cv2() else cnts[1]
		cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4]

  		# loop over the contours
		for c in cnts:
			# compute the bounding box for the contour then extract
			# the digit
			(x, y, w, h) = cv2.boundingRect(c)
			roi = gray[y - 5:y + h + 5, x - 5:x + w + 5]
			# display the character, making it large enough for us
			# to see, then wait for a keypress
			cv2.imshow("ROI", imutils.resize(roi, width=28))
			key = cv2.waitKey(0)

  			# if the '`' key is pressed, then ignore the character
			if key == ord("`"):
				print("[INFO] ignoring character")
			# grab the key that was pressed and construct the path
			# the output directory
			key = chr(key).upper()
			dirPath = os.path.sep.join([args["annot"], key])
			# if the output directory does not exist, create it
			if not os.path.exists(dirPath):

  			# write the labeled character to file
			count = counts.get(key, 1)
			p = os.path.sep.join([dirPath, "{}.png".format(
			cv2.imwrite(p, roi)
			# increment the count for the current key
			counts[key] = count + 1

  	# we are trying to control-c out of the script, so break from the
	# loop (you still need to press a key for the active window to
	# trigger this)
	except KeyboardInterrupt:
		print("[INFO] manually leaving script")
	# an unknown error has occurred for this particular image
		print("[INFO] skipping image...")


# import the necessary packages
import imutils
import cv2
def preprocess(image, width, height):
	# grab the dimensions of the image, then initialize
	# the padding values
	(h, w) = image.shape[:2]
	# if the width is greater than the height then resize along
	# the width
	if w > h:
		image = imutils.resize(image, width=width)
	# otherwise, the height is greater than the width so resize
	# along the height
		image = imutils.resize(image, height=height)

  	# determine the padding values for the width and height to
	# obtain the target dimensions
	padW = int((width - image.shape[1]) / 2.0)
	padH = int((height - image.shape[0]) / 2.0)
	# pad the image then apply one more resizing to handle any
	# rounding issues
	image = cv2.copyMakeBorder(image, padH, padH, padW, padW,
	image = cv2.resize(image, (width, height))
	# return the pre-processed image
	return image

        在图像验证码数据集上训练 LeNet。 打开 train_model.py 文件并插入以下代码:

# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.optimizers import SGD
from pyimagesearch.nn.conv import LeNet
from pyimagesearch.utils.captchahelper import preprocess
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import cv2
import os

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
	help="path to input dataset")
ap.add_argument("-m", "--model", required=True,
	help="path to output model")
args = vars(ap.parse_args())

# initialize the data and labels
data = []
labels = []
# loop over the input images
for imagePath in paths.list_images(args["dataset"]):
	# load the image, pre-process it, and store it in the data list
	image = cv2.imread(imagePath)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	image = preprocess(image, 28, 28)
	image = img_to_array(image)
	# extract the class label from the image path and update the
	# labels list
	label = imagePath.split(os.path.sep)[-2]

# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
	labels, test_size=0.25, random_state=42)
# convert the labels from integers to vectors
lb = LabelBinarizer().fit(trainY)
trainY = lb.transform(trainY)
testY = lb.transform(testY)

# initialize the model
print("[INFO] compiling model...")
model = LeNet.build(width=28, height=28, depth=1, classes=9)
opt = SGD(lr=0.01)
model.compile(loss="categorical_crossentropy", optimizer=opt,

# train the network
print("[INFO] training network...")
H = model.fit(trainX, trainY,  validation_data=(testX, testY),
	batch_size=32, epochs=15, verbose=1)
# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(testX, batch_size=32)
	predictions.argmax(axis=1), target_names=lb.classes_))
# save the model to disk
print("[INFO] serializing network...")

# plot the training + testing loss and accuracy
plt.plot(np.arange(0, 15), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, 15), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, 15), H.history["accuracy"], label="acc")
plt.plot(np.arange(0, 15), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")


python train_model.py --dataset dataset --model output/lenet.hdf5

[INFO] compiling model...
[INFO] training network...
Train on 1509 samples, validate on 503 samples
Epoch 1/15
0s - loss: 2.1606 - acc: 0.1895 - val_loss: 2.1553 - val_acc: 0.2266
Epoch 2/15
0s - loss: 2.0877 - acc: 0.3565 - val_loss: 2.0874 - val_acc: 0.1769
Epoch 3/15
0s - loss: 1.9540 - acc: 0.5003 - val_loss: 1.8878 - val_acc: 0.3917
Epoch 15/15
0s - loss: 0.0152 - acc: 0.9993 - val_loss: 0.0261 - val_acc: 0.9980
[INFO] evaluating network...
             precision    recall  f1-score   support
          1       1.00      1.00      1.00        45
          2       1.00      1.00      1.00        55
          3       1.00      1.00      1.00        63
          4       1.00      0.98      0.99        52
          5       0.98      1.00      0.99        51
          6       1.00      1.00      1.00        70
          7       1.00      1.00      1.00        50
          8       1.00      1.00      1.00        54
          9       1.00      1.00      1.00        63
avg / total       1.00      1.00      1.00       503
[INFO] serializing network...
仅训练15轮就达到100%的分类准确率,这不是过拟合的情况,到第 5 阶段,验证和训练的损失/准确度相互匹配


# import the necessary packages
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from pyimagesearch.utils.captchahelper import preprocess
from imutils import contours
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True,
	help="path to input directory of images")
ap.add_argument("-m", "--model", required=True,
	help="path to input model")
args = vars(ap.parse_args())

# load the pre-trained network
print("[INFO] loading pre-trained network...")
model = load_model(args["model"])
# randomly sample a few of the input images
imagePaths = list(paths.list_images(args["input"]))
imagePaths = np.random.choice(imagePaths, size=(10,),

# loop over the image paths
for imagePath in imagePaths:
	# load the image and convert it to grayscale, then pad the image
	# to ensure digits caught near the border of the image are
	# retained
	image = cv2.imread(imagePath)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	gray = cv2.copyMakeBorder(gray, 20, 20, 20, 20,
	# threshold the image to reveal the digits
	thresh = cv2.threshold(gray, 0, 255,

  	# find contours in the image, keeping only the four largest ones,
	# then sort them from left-to-right
	cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
	cnts = cnts[0] if imutils.is_cv2() else cnts[1]
	cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4]
	cnts = contours.sort_contours(cnts)[0]
	# initialize the output image as a "grayscale" image with 3
	# channels along with the output predictions
	output = cv2.merge([gray] * 3)
	predictions = []

  	# loop over the contours
	for c in cnts:
		# compute the bounding box for the contour then extract the
		# digit
		(x, y, w, h) = cv2.boundingRect(c)
		roi = gray[y - 5:y + h + 5, x - 5:x + w + 5]
		# pre-process the ROI and then classify it
		roi = preprocess(roi, 28, 28)
		roi = np.expand_dims(img_to_array(roi), axis=0) / 255.0
		pred = model.predict(roi).argmax(axis=1)[0] + 1
		# draw the prediction on the output image
		cv2.rectangle(output, (x - 2, y - 2),
			(x + w + 4, y + h + 4), (0, 255, 0), 1)
		cv2.putText(output, str(pred), (x - 5, y - 5),
			cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)

  	# show the output image
	print("[INFO] captcha: {}".format("".join(predictions)))
	cv2.imshow("Output", output)


$ python test_model.py --input downloads --model output/lenet.hdf5
Using TensorFlow backend.
[INFO] loading pre-trained network...
[INFO] captcha: 2696
[INFO] captcha: 2337
[INFO] captcha: 2571
[INFO] captcha: 8648


        1. 收集原始图像数据集。

        2. 标记和注释我们的训练图像。

        3. 在我们标记的数据集上训练一个自定义的卷积神经网络。

        4. 在示例图像上测试和评估我们的模型。

        本章(以及下一章使用深度学习进行微笑检测)利用计算机视觉和 OpenCV 库来促进构建完整的应用程序。 如果您打算成为一名认真的深度学习从业者,我强烈建议您学习图像处理和 OpenCV 库的基础知识——即使对这些概念有基本的了解,也将使您能够:

        1. 在更高层次上欣赏深度学习。

        2. 开发更强大的应用程序,使用深度学习进行图像分类

        3. 利用图像处理技术更快地实现您的目标。


原文地址: https://outofmemory.cn/zaji/5479870.html

