"""Python通过关键词下载百度图片.py 双击本程序,输入关键词,会下载一些图片,这些图片存放在以关键词为文件夹名称的目录中。""" import requests from tools import * def dump(filecontent,filename): """写入到文件里""" f = open(filename,mode='wb') f.write(filecontent) f.close() def download(keyword,link): """下载链接保存为图片,保存到名为keywords的文件夹中""" try: resp = requests.get(link,timeout=5, verify=True) img = resp.content dump(img, random_file_name(keyword)) except: pass if __name__ == "__main__": baidu_image_search = "http://image.baidu.com/search/index?tn=baiduimage&ie=utf-8&word=" keyword = input("请输入关键词:\n") search_url = baidu_image_search + keyword links = get_all_links(search_url) for link in links: print("当前下载:",link,"\n") download(keyword,link) # #风火轮少儿编程内部简易爬虫教学程序初稿 #下面是tools模块的代码,此模块不做教学,转载请注明出处。 import requests,re from random import choice from time import time import os def random_file_name(keywords): """以关键词为文件夹名,生成随机文件名""" folder = os.getcwd() + os.sep + keywords if not os.path.exists(folder) : os.mkdir(folder) 字符集 = 'abcdefghijklmnopqrstuvwxyz01234567890' filename="".join([ choice(字符集) for _ in range(3)]) s=str(time()) s=s.split(".")[-1] return folder + os.sep + s + filename + ".jpg" def get_all_links(link): """收集一个url页面的所有链接,返回到列表""" urls = set() headers = {'Accept': 'text/html, application/xhtml+xml, image/jxr, */*', 'Accept - Encoding':'gzip, deflate', 'Accept-Language':'zh-Hans-CN, zh-Hans; q=0.5', 'Connection':'Keep-Alive', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'} resp = requests.get(link,headers = headers,timeout=5, verify=True) html = resp.text for i in range(5): html = html.replace("http://img" + str(i) + ".imgtn.bdimg.com","https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy") regex = re.compile("https://ss[0-3].bdstatic.com/\w+/it/u=\d+,\d+&fm=\d+&gp=0\.jpg") links = re.findall(regex,html) return set(links) if __name__ == "__main__": keywords = "风火轮少儿编程" print(random_file_name(keywords))
发表评论