app.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import requests from bs4 import BeautifulSoup import pandas as pd def scrape_github_profile(username): url = f'https://github.com/{username}' headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers) if response.status_code != 200: print(f"Failed to fetch user: {username}") return None soup = BeautifulSoup(response.text, 'html.parser') # Extract profile data name_tag = soup.find('span', class_='p-name') name = name_tag.text.strip() if name_tag else 'N/A' biography_tag = soup.find('div',class_="p-note user-profile-bio mb-3 js-user-profile-bio f4") biography = biography_tag.text.strip() if biography_tag else 'N/A' company_tag = soup.find('span', class_='p-org') company = company_tag.text.strip() if company_tag else 'N/A' location_tag = soup.find('span', class_='p-label') location = location_tag.text.strip() if location_tag else 'N/A' # Extract repository count repo_tag = soup.find('a', href=f'/{username}?tab=repositories') print(repo_tag) repos = repo_tag.find('span', class_='Counter').text.strip() if repo_tag else 'N/A' # Extract avatar URL avatar_tag = soup.find('img', class_='avatar-user') avatar_url = avatar_tag['src'] if avatar_tag else 'N/A' # Return as dictionary return { 'Username': username, 'Biography':biography, 'Name': name, 'Company': company, 'Location': location, 'Public Repos': repos, 'Avatar URL': avatar_url } # Example usage if __name__ == '__main__': username = 'bradtraversy' # Replace with desired GitHub username profile_data = scrape_github_profile(username) if profile_data: df = pd.DataFrame([profile_data]) df.to_excel(f'{username}_profile.xlsx', index=False) print(f'Data saved to {username}_profile.xlsx') |