Skip to content Skip to sidebar Skip to footer

Scraping A Complex Table Using Beautifulsoup And Python

<

Solution 1:

Given the html you provided, this should work :

if soup.find('tr', attrs={'class':'listHeader'}) : 
    headers = [ 
        'none'if c isNoneelse c.get_text(strip=True) 
        for c in soup.find('tr', attrs={'class':'listHeader'}).findAll('th') 
    ]
else : 
    headers = None

table = soup.find('table', attrs={'id':'MainContent_grdUsers2'})
data = []

for tr in table.find_all('tr')[1:] : 
    td = tr.find_all('td') 
    try : 
        data += [ 
            [ 
                td[0].getText() , 
                td[2].find('option', {'selected':'selected'}).getText(), 
                td[3].find('option', {'selected':'selected'}).getText(), 
                td[4].find('input').get('value'),  
                td[5].find('input').get('value'),
                td[6].find('option', {'selected':'selected'}).getText()
            ] 
        ]
    except Exception as ex : 
        #print(ex)  ## you can uncomment this line for debugging ##continuefor row in data : 
    print(' '.join(str(r) for r in row))

Output:

user1 Supervisor Medium First1 Last1 user1@company.com Inactive
user2 Supervisor Medium First2 Last2 user2@company.com Active

Post a Comment for "Scraping A Complex Table Using Beautifulsoup And Python"