"""Python通过关键词下载百度图片.py 双击本程序,输入关键词,会下载一些图片,这些图片存放在以关键词为文件夹名称的目录中。"""
import requests
from tools import *
def dump(filecontent,filename):
"""写入到文件里"""
f = open(filename,mode='wb')
f.write(filecontent)
f.close()
def download(keyword,link):
"""下载链接保存为图片,保存到名为keywords的文件夹中"""
try:
resp = requests.get(link,timeout=5, verify=True)
img = resp.content
dump(img, random_file_name(keyword))
except:
pass
if __name__ == "__main__":
baidu_image_search = "http://image.baidu.com/search/index?tn=baiduimage&ie=utf-8&word="
keyword = input("请输入关键词:\n")
search_url = baidu_image_search + keyword
links = get_all_links(search_url)
for link in links:
print("当前下载:",link,"\n")
download(keyword,link)
#
#风火轮少儿编程内部简易爬虫教学程序初稿
#下面是tools模块的代码,此模块不做教学,转载请注明出处。
import requests,re
from random import choice
from time import time
import os
def random_file_name(keywords):
"""以关键词为文件夹名,生成随机文件名"""
folder = os.getcwd() + os.sep + keywords
if not os.path.exists(folder) : os.mkdir(folder)
字符集 = 'abcdefghijklmnopqrstuvwxyz01234567890'
filename="".join([ choice(字符集) for _ in range(3)])
s=str(time())
s=s.split(".")[-1]
return folder + os.sep + s + filename + ".jpg"
def get_all_links(link):
"""收集一个url页面的所有链接,返回到列表"""
urls = set()
headers = {'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
'Accept - Encoding':'gzip, deflate',
'Accept-Language':'zh-Hans-CN, zh-Hans; q=0.5',
'Connection':'Keep-Alive',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
resp = requests.get(link,headers = headers,timeout=5, verify=True)
html = resp.text
for i in range(5):
html = html.replace("http://img" + str(i) + ".imgtn.bdimg.com","https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy")
regex = re.compile("https://ss[0-3].bdstatic.com/\w+/it/u=\d+,\d+&fm=\d+&gp=0\.jpg")
links = re.findall(regex,html)
return set(links)
if __name__ == "__main__":
keywords = "风火轮少儿编程"
print(random_file_name(keywords))