You need to enable JavaScript to run this app.
最新活动
大模型
产品
解决方案
定价
生态与合作
支持与服务
开发者
了解我们

使用BeautifulSoup爬取世界政府债券网站收益率表格时返回空表的问题求助

使用BeautifulSoup爬取世界政府债券网站收益率表格时返回空表的问题求助

我正在尝试从一个网站爬取多个国家不同期限的收益率表格,但目前只得到空表:

enter image description here

而预期的表格应该是这样的:

enter image description here

我目前的实现代码如下:

import time 
import datetime as dt
import pandas as pd
from bs4 import BeautifulSoup
from dateutil.relativedelta import relativedelta
import requests
import re 
import os

path = os.getcwd()

def ZCCWord(Date,country): 

    # Site URL
    url="http://www.worldgovernmentbonds.com/country/"+country

    html_content = requests.get(url).text
    soup = BeautifulSoup(html_content, "lxml")
    #gdp = soup.find_all("table", attrs={"class": "w3-table w3-white table-padding-custom w3 small font-family-arial table-valign-middle"})
    gdp = soup.find_all("table") # , attrs={"class": "w3-table money pd44 -f15"})
    table1 = gdp[0]
    body = table1.find_all("tr")
    body_rows = body[1:] 
    all_rows = [] # will be a list for list for all rows
    for row_num in range(len(body_rows)): # A row at a time
        row = [] # this will hold entries for one row
        for row_item in body_rows[row_num].find_all("td"): #loop through all row entries
            aa = re.sub("(\xa0)|(\n)|,","",row_item.text)
            #append aa to row - note one row entry is being appended
            row.append(aa)
        # append one row to all_rows
        all_rows.append(row)

    AAA           = pd.DataFrame(all_rows)
    ZCC           = pd.DataFrame()
    ZCC           = AAA[1].str.extract('([^a-zA-Z]+)([a-zA-Z]+)', expand=True).dropna().reset_index(drop=True)
    ZCC.columns   = ['TENOR', 'PERIOD'] 
    ZCC['TENOR'] = ZCC['TENOR'].str.strip().str.isdigit()  # Remove leading/trailing spaces
    #ZCC = ZCC[ZCC['TENOR'].str.isdigit()] 
    ZCC['TENOR']  = ZCC['TENOR'].astype(int)
    ZCC['RATES']  = AAA[2].str.extract(r'([0-9.]+)', expand=True).dropna().reset_index(drop=True).astype(float)
    ZCC['RATES']  = ZCC['RATES']/100

    row2      = []
    for i in range(len(ZCC)): 
        if ZCC['PERIOD'][i]=='month' or  ZCC['PERIOD'][i]=='months':
            b  = ZCC['TENOR'][i]
            bb = Date + relativedelta(months = b)
            row2.append(bb)
        else: 
            b  = ZCC['TENOR'][i]
            bb = Date + relativedelta(years = b)
            row2.append(bb)

    ZCC['DATES'] = pd.DataFrame(row2)
    ZCC = ZCC.reindex(['TENOR','PERIOD','DATES','RATES'], axis=1)
    return ZCC



LitsCountries   =  ['spain','portugal','latvia','ireland','united-kingdom',
                'germany', 'france','italy','sweden','finland','greece',
                'poland','romania','hungary','netherlands']

todays_date     = path+'\\WorldYields' +str(dt.datetime.now().strftime("%Y-%m-%d-%H-%M") )+ '.xlsx'   
writer          = pd.ExcelWriter(todays_date, engine='xlsxwriter',engine_kwargs={'options':{'strings_to_urls': False}})
dictYield       = {}

for i in range(len(LitsCountries)): 
        country         = LitsCountries[i]
        Date            = pd.to_datetime('today').date()
        country         = LitsCountries[i] 
        ZCC             = ZCCWord(Date,country)  
        dictYield[i]    = ZCC
        ZCC.to_excel(writer, sheet_name=country)     
       
writer.close()    
time.sleep(60) # wait one minute

我也可以接受其他能提供类似输出的网站、解决方案或方法,请问有没有解决思路?

提前感谢!

备注:内容来源于stack exchange,提问作者Luca91

火山引擎 最新活动