1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | #for循環、格式化輸出 import urllib.request as ur import bs4 for i in range(1,10): for j in range(10,n): s = ("url_s"%i) d = ("url_d"%j) request_s = ur.Request(s, headers={ "User-Agent":"------" }) with ur.urlopen(request_s) as response_s: data_s = response_s.read().decode('UTF-8') root_s = bs4.BeautifulSoup(data_s, 'html.parser') title_s = root_s.find_all("div",class_="title") request_d = ur.Request(d, headers={ "User-Agent": "------" }) with ur.urlopen(request_d) as response_d: data_d = response_d.read().decode('UTF-8') root_d = bs4.BeautifulSoup(data_d, 'html.parser') title_d = root_d.find_all("div", class_="title") with open('ppt-HS.txt',mode='w',encoding='utf-8') as file: for titles_s in title_s: for titles_d in title_d: if titles_s.a and titles_d.a !=None: file.write(titles_s.a.string+'\n'+titles_d.a.string) |
Direct link: https://paste.plurk.com/show/8368wN3Xep1jPVdH0iPV