国内视频培训网站建设,网站后台如何添加关键词,环江住房和城乡建设部网站,阿里建站系统
https://www.duitang.com/napi/blogv2/list/by_search/?堆糖页面分析#xff1a;使用Ajax加载#xff0c;aferid是控制加载的图片和页面#xff0c;从零开始#xff0c;会提前加载下一页的Ajax数据第一页的图片是after_id从0到120#xff0c;会提前…
https://www.duitang.com/napi/blogv2/list/by_search/?堆糖页面分析使用Ajax加载aferid是控制加载的图片和页面从零开始会提前加载下一页的Ajax数据第一页的图片是after_id从0到120会提前加载下一页的after_id:124import time
from urllib.parse import urlencode
import requests
import re
from threading import Thread
from queue import Queue
import json
import osclass ThreadFetchUrl(Thread):def __init__(self, url_queue, img_data_queue, headers):super().__init__()self.url_queue url_queueself.headers headersself.img_data_queue img_data_queuedef run(self):while not self.url_queue.empty():url self.url_queue.get()response requests.get(url, headersself.headers).text将Ajax中的json字符串写入文本page_name re.findall(after_id(.*?), url)[0]with open(self.path page_name .txt, w, encodingutf-8) as f:f.write(response.text)dict_resp json.loads(response)list dict_resp[data][object_list]for i in list:id i[photo][id]href i[photo][path]self.img_data_queue.put((id, href))else:print(url_queue已空线程结束)class ThreadSaveImg(Thread): 将url添加到队列中 def __init__(self, img_data_queue, path):super().__init__()self.path pathself.img_data_queue img_data_queuedef run(self): 线程执行代码块 while True:try:id, href self.img_data_queue.get(timeout3)except:print(等待超时线程停止)breakelse:postfix href.split(.)[-1]img_data requests.get(href).contentwith open(self.path str(id) . postfix, wb) as f:f.write(img_data)print(f图片{id}保存成功)class ImageDuitang(ThreadFetchUrl, ThreadSaveImg):def __init__(self):self.url_prefix https://www.duitang.com/napi/blogv2/list/by_search/?self.headers {User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.69,}self.url_queue Queue()self.img_data_queue Queue()if not os.path.exists(./duitang1):os.mkdir(./duitang1)self.path ./duitang1/def urlenqueue(self, page_num, kwd):for i in range(0, 24 * 5 * page_num, 24):params {kw: {}.format(kwd),after_id: f{i},type: feed,include_fields: top_comments,is_root, source_link, item, buyable, root_id, status, like_count, like_id, sender, album, reply_count, favorite_blog_id,_type: ,}url self.url_prefix urlencode(params)self.url_queue.put(url)def main(self):kwd input(请输入数据关键字)page_num int(input(请输入要抓取前几页))self.urlenqueue(page_num, kwd)for i in range(10):t1 ThreadFetchUrl(self.url_queue, self.img_data_queue, self.headers)t1.start()for i in range(30):t2 ThreadSaveImg(self.img_data_queue, self.path)t2.start()if __name__ __main__:DT ImageDuitang()DT.main()print(\n主线程已结束\n)