1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
#簡易爬蟲
import urllib.request as ur                
url = '任意網址'        
request =ur.Request(url, headers={
    "User-Agent":"------"
})                                                                  #建立Request物件,附加上Request Headers資訊(由網頁抓)
with ur.urlopen(request) as response:          #抓取網頁上data
    data = response.read().decode('utf-8')
#print(data)
import bs4                                                    #解析原始碼mod
root = bs4.BeautifulSoup(data, "html.parser")    #讓BeautifulSoup解析HTML文件
#print(root.title.string)
titles = root.find_all("div",class_="title")        #找出符合類別定義('class_='的標籤('div')
#print(titles)
for title in titles:                                            #for迴圈找出網頁中所有符合的檔案(用find只會抓最新一個)
    if title.a !=None:                                       #排除不存在的標籤後抓取data
        print(title.a.string)