Python スクレイピング（requests, BeautifulSoup）メモ 01

記事

IT・テクノロジー

import requests

from bs4 import BeautifulSoup

import tkinter

url = 'ターゲットURLをにゅうりょく [夏目智徹]'

site = requests.get(url)

site.encoding = site.apparent_encoding # 文字化け対策

data = BeautifulSoup(site.text, 'html.parser')

data_02 = BeautifulSoup(site.text, 'html.parser')

data_03 = BeautifulSoup(site.text, 'html.parser')

print(data.title)

# printf(data.title.text)

print(data.title.text)

#　テキストだけ取得

# print(data.find('a').text)

# print(data.find('p').text)

# print(data.find('a').text)

print(data.find_all('img')) #すべての「a」タグを出力する

print(data.find_all('p')) #すべての「a」タグを出力する

print(data.find_all(id='sysDescription')) #id属性「id_name」に一致するタグを出力する

# arr_item = data.find(id='sysDescription')

# print(data.find(class_='hpb-nav')) #id属性「id_name」に一致するタグを出力する

# ウィンドウ作成　モジュール

#tkinter._test()

srcs = []

#----------- aタグ　の中身だけ　取得

for item in data_02.find_all('img'):

if item.get('src').endswith('.jpg'):

srcs.append(item.get('src'))

elif item.get('src').endswith('.png'):

srcs.append(item.get('src'))

elif item.get('src').endswith('.jpeg'):

srcs.append(item.get('src'))

for i in srcs:

print(i)

"""

for itme_text in data_03.find_all(id='sysDescription') :

print(itme_text.text)

"""

Python スクレイピング（requests, BeautifulSoup） メモ 01