python - Figuring out how to web scrape with BeautifulSoup -
i trying scrape data in table "periods" , "percent per annum" (table 4) columns in url:
my code follows, think getting confused how refer row above first date , corresponding number , hence error attributeerror: 'nonetype' object has no attribute 'gettext'
in line row_name = row.findnext('td.header_units').gettext()
.
from bs4 import beautifulsoup import urllib2 url = "http://sdw.ecb.europa.eu/browsetable.do?node=qview&series_key=165.yc.b.u2.eur.4f.g_n_a.sv_c_ym.sr_30y" content = urllib2.urlopen(url).read() soup = beautifulsoup(content) desired_table = soup.findall('table')[4] # find columns want data headers1 = desired_table.findall('td.header_units') headers2 = desired_table.findall('td.header') desired_columns = [] th in headers1: #i'm working `headers1` see if have right idea desired_columns.append([headers1.index(th), th.gettext()]) # iterate through each row grabbing data desired columns rows = desired_table.findall('tr') row in rows[1:]: cells = row.findall('td') row_name = row.findnext('td.header_units').gettext() column in desired_columns: print(cells[column[0]].text.encode('ascii', 'ignore'), row_name.encode('ascii', 'ignore'), column[1].encode('ascii', 'ignore'))
thank you
this put elements in tuples pairs:
from bs4 import beautifulsoup import requests r = requests.get( "http://sdw.ecb.europa.eu/browsetable.do?node=qview&series_key=165.yc.b.u2.eur.4f.g_n_a.sv_c_ym.sr_30y") soup = beautifulsoup(r.content) data = iter(soup.find("table", {"class": "tablestats"}).find("td", {"class": "header"}).find_all_next("tr")) headers = (next(data).text, next(data).text) table_items = [(a.text, b.text) ele in data a, b in [ele.find_all("td")]] a, b in table_items: print(u"period={}, percent per annum={}".format(a, b if b.strip() else "null"))
output:
period=2015-06-09, percent per annum=1.842026 period=2015-06-08, percent per annum=1.741636 period=2015-06-07, percent per annum=null period=2015-06-06, percent per annum=null period=2015-06-05, percent per annum=1.700042 period=2015-06-04, percent per annum=1.667431
Comments
Post a Comment