1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#for循環、格式化輸出
import urllib.request as ur
import bs4
for i  in range(1,10):
   for j in range(10,n):
        s = ("url_s"%i)
        d = ("url_d"%j)

        request_s = ur.Request(s, headers={
        "User-Agent":"------"
        })
        with ur.urlopen(request_s) as response_s:
            data_s = response_s.read().decode('UTF-8')
            root_s = bs4.BeautifulSoup(data_s, 'html.parser')
            title_s = root_s.find_all("div",class_="title")

        request_d = ur.Request(d, headers={
        "User-Agent": "------"
        })
        with ur.urlopen(request_d) as response_d:
            data_d = response_d.read().decode('UTF-8')
            root_d = bs4.BeautifulSoup(data_d, 'html.parser')
            title_d = root_d.find_all("div", class_="title")

        with open('ppt-HS.txt',mode='w',encoding='utf-8') as file:
            for titles_s in title_s:
                for titles_d in title_d:
                    if titles_s.a and titles_d.a !=None:
                            file.write(titles_s.a.string+'\n'+titles_d.a.string)