wordpress安装插件导致网站,网站更换主机注意,网站标题空格,免费服务器虚拟化最近研究了一下图像识别#xff0c;一直找到很好的应用场景#xff0c;今天我就发现可以用百度的旋转验证码来做一个实验。没想到效果还挺好#xff0c;下面就是实际的识别效果。
1、效果演示 2、如何识别
2.1准备数据集
首先需要使用爬虫#xff0c;对验证码图片进行采… 最近研究了一下图像识别一直找到很好的应用场景今天我就发现可以用百度的旋转验证码来做一个实验。没想到效果还挺好下面就是实际的识别效果。
1、效果演示 2、如何识别
2.1准备数据集
首先需要使用爬虫对验证码图片进行采集尽量每一种类型都要采集到。 2.2图像矫正
接下来对采集的数据进行人工校正 2.3数据清洗
1对数据进行进行旋转达到增加数据量的目的。
2对数据进行灰度化处理将三维图片降为二维。
3对图片大小进行resize可以提高训练速度。
# 图片转换部分得到x
picture Picture(pathimg_path)
# 图像灰度化处理
temp_img picture.gray()
# 图像resize
temp_img temp_img.resize((50, 50), Image.LANCZOS)
# 获取y
word img_path.split(\\)[-1].split(-)[0]# 结果包装成列表保证xy是一个整体不被打乱
res [np.array(temp_img),np.array(word)]
# 将结构给全局变量
result_list.append(res)
# 记录完成数量
complete_list.append(img_path)
2.4划分训练集与测试集
一般训练集占数据量的80%测试集占总数据量的20%当然也可以根据自己的情况调整比例。
2.5训练模型
这里可以使用CNN神经网络模型进行训练效果非常不错。 2.6实战测试
下面直接上代码。其中的滑动系数可能需要自行调整这个变动不会太频繁可能几个月某度变一次。
__author__ dengxinyanimport os
import sys
import time
import base64
import random
import requests
from PIL import Image
from io import BytesIO
sys.path.append(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))))
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver import ActionChains#PIL图片保存为base64编码
def PIL_base64(img, codingutf-8):img_format img.formatif img_format None:img_format JPEGformat_str JPEGif png img_format.lower():format_str PNGif gif img_format.lower():format_str gifif img.mode P:img img.convert(RGB)if img.mode RGBA:format_str PNGimg_format PNGoutput_buffer BytesIO()# img.save(output_buffer, formatformat_str)img.save(output_buffer, quality100, formatformat_str)byte_data output_buffer.getvalue()base64_str data:image/ img_format.lower() ;base64, base64.b64encode(byte_data).decode(coding)return base64_str# 根据链接下载旋转图片
def get_img(url):header {Host: passport.baidu.com,User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0,Accept: image/webp,*/*,Accept-Language: zh-CN,zh;q0.8,zh-TW;q0.7,zh-HK;q0.5,en-US;q0.3,en;q0.2,Accept-Encoding: gzip, deflate, br,Connection: keep-alive,Referer: https://wappass.baidu.com/static/captcha/tuxing.html?akc27bbc89afca0463650ac9bde68ebe06backurlhttps%3A%2F%2Fwww.baidu.com%2Fs%3Fcl%3D3%26tn%3Dbaidutop10%26fr%3Dtop1000%26wd%3D%25E6%25B6%2588%25E9%2598%25B2%25E6%2588%2598%25E5%25A3%25AB%25E8%25BF%259E%25E5%25A4%259C%25E7%25AD%2591%25E5%259D%259D%25E5%25BA%2594%25E5%25AF%25B9%25E6%25B4%25AA%25E5%25B3%25B0%25E8%25BF%2587%25E5%25A2%2583%26rsv_idx%3D2%26rsv_dl%3Dfyb_n_homepage%26hisfilter%3D1logid8309940529500911554signature4bce59041938b160b7c24423bde0b518timestamp1624535702,Cookie: BAIDUIDA0621DC238F4D936B38F699B70A7E41F:SL0:NR10:FG1; BIDUPSIDA0621DC238F4D9360CD42C9C31352635; PSTM1667351865; HOSUPPORT1; UBIfi_PncwhpxZ%7ETaKAanh2ue0vFk6vHMY02DgvigILJIFul8Z1nzMr9do3SYLtjAUqHSpUz7LvOKV27cIr18-YJryP0Q8j92oo93%7E6hGa0CLdraAlaHUZG-0PW9QrpZkW7MTyUn-yrAq7OmSRBIJ7%7E8gM9pv-; USERNAMETYPE2; SAVEUSERID3cd458184c56c2fe28174e594101f074d63463446d; HISTORY0ece87e30ec8ecccd52ff3d5c42f98002a893bfb73ff358893; BDUSS_BFESSNOcWd6YWJRbmFVUVBBaWVkaHJNSm5tRUpUaUVMaTNHOHcwZVVaVDdsYXlLZmxrSVFBQUFBJCQAAAAAAAAAAAEAAAC13Mct0KHQwl9keHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALKc0WSynNFkRD; H_WISE_SIDS219946_216846_213346_219942_213039_230178_204909_230288_110085_236307_243888_244730_245412_243706_232281_249910_247148_250889_249892_252577_234296_253427_253705_240590_254471_179345_254689_254884_254864_253213_255713_254765_255939_255959_255982_107317_256062_256093_256083_255803_253993_256257_255661_256025_256223_256439_256446_254831_253151_256252_256196_256726_256739_251973_256230_256611_256996_257068_257079_257047_254075_257110_257208_251196_254144_257290_251068_256095_257287_254317_251059_251133_254299_257454_257302_255317_255907_255324_257481_244258_257582_257542_257503_255177_257745_257786_257937_257167_257904_197096_257586_257402_255231_257790_258193_258248_258165_8000084_8000115_8000114_8000126_8000140_8000149_8000166_8000172_8000178_8000181_8000185_8000204; ZFYSxMcCdU3pSsmienZSgA2BTmHLR9S6caVmiP5Ic:Awuz0:C; BAIDUID_BFESSA0621DC238F4D936B38F699B70A7E41F:SL0:NR10:FG1; Hm_lvt_90056b3f84f90da57dc0f40150f005d51690961642,1692328306; STOKEN01dbff3d6ff696219b39c9fb730c31c34e032c0eebff4fe535d2f1dde0c7b45b; BDUSSNOcWd6YWJRbmFVUVBBaWVkaHJNSm5tRUpUaUVMaTNHOHcwZVVaVDdsYXlLZmxrSVFBQUFBJCQAAAAAAAAAAAEAAAC13Mct0KHQwl9keHkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALKc0WSynNFkRD; PTOKEN92e828db8120372a7baa2557ea4ec476; MAWEBCUIDweb_VYfxPuQDaKjEzVgXMFgoHouACkpXyjcDpcWwhATKqELuuwEtNy; __bid_n18a4ab547aa11525d249ea,}response requests.get(urlurl,headersheader)if response.status_code 200:img Image.open(BytesIO(response.content))# 将图片转换成base64字符串并返回return PIL_base64(img)# 验证码识别接口
def shibie(img_base64):url http://www.detayun.cn/openapi/verify_code_identify/data {# 用户的keykey:JxSfP4E1yfcmJZB6ynOB,# 验证码类型verify_idf_id:16,# 样例图片img_base64:img_base64,img_byte: None,# 中文点选空间语义类型验证码的文本描述这里缺省为空字符串words:}header {Content-Type: application/json}# 发送请求调用接口response requests.post(urlurl, jsondata, headersheader)print(response.text)return int(str(response.json()[data][res_str]).replace(顺时针旋转,).replace(度,))if __name__ __main__:# 加载防检测jswith open(.\webdriver\stealth.min.js) as f:js f.read()options webdriver.ChromeOptions()driver webdriver.Chrome(executable_path.\webdriver\chromedriver.exe, optionsoptions)driver.execute_cdp_cmd(Page.addScriptToEvaluateOnNewDocument, {source: js})# 访问百度首页driver.get(https://wappass.baidu.com/static/captcha/tuxing.html?akc27bbc89afca0463650ac9bde68ebe06backurlhttps%3A%2F%2Fwww.baidu.com%2Fs%3Fcl%3D3%26tn%3Dbaidutop10%26fr%3Dtop1000%26wd%3D%25E6%25B6%2588%25E9%2598%25B2%25E6%2588%2598%25E5%25A3%25AB%25E8%25BF%259E%25E5%25A4%259C%25E7%25AD%2591%25E5%259D%259D%25E5%25BA%2594%25E5%25AF%25B9%25E6%25B4%25AA%25E5%25B3%25B0%25E8%25BF%2587%25E5%25A2%2583%26rsv_idx%3D2%26rsv_dl%3Dfyb_n_homepage%26hisfilter%3D1logid8309940529500911554signature4bce59041938b160b7c24423bde0b518timestamp1624535702)# 等待滑块出现WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath(//div[contains(class,passMod_slide-btn)]))yzm_button driver.find_element_by_xpath(//div[contains(class,passMod_slide-btn)])time.sleep(1)move_x 100# 等待验证码出现WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath(//img[contains(class,passMod_spin-background)]))img_src driver.find_element_by_xpath(//img[contains(class,passMod_spin-background)]).get_attribute(src)# 下载图片并转化为base64img_base64 get_img(img_src)# 识别图片旋转角度move_x shibie(img_base64)# 通过旋转角度 * 滑动系数 滑动距离move_x move_x * 0.661# 开始滑动action ActionChains(driver)action.click_and_hold(yzm_button).perform() # 鼠标左键按下不放action.move_by_offset(move_x, 0).perform()action.release().perform() # 释放鼠标time.sleep(2)# 第二次滑动# 等待滑块出现WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath(//div[contains(class,passMod_slide-btn)]))yzm_button driver.find_element_by_xpath(//div[contains(class,passMod_slide-btn)])time.sleep(1)move_x 100# 等待验证码出现WebDriverWait(driver, 10).until(lambda x: x.find_element_by_xpath(//img[contains(class,passMod_spin-background)]))img_src driver.find_element_by_xpath(//img[contains(class,passMod_spin-background)]).get_attribute(src)# 下载图片并转化为base64img_base64 get_img(img_src)# 识别图片旋转角度move_x shibie(img_base64)# 通过旋转角度 * 滑动系数 滑动距离move_x move_x * 0.661# 开始滑动action ActionChains(driver)action.click_and_hold(yzm_button).perform() # 鼠标左键按下不放action.move_by_offset(move_x, 0).perform()action.release().perform() # 释放鼠标
3、总结
这个旋转验证码非常有特色而且有很大的难度。特别是在标记训练图片的时候非常耗费时间。
现在我也把识别模型封装成了接口感兴趣的小伙伴可以免费使用得塔云