Python 3 Github API V3 Web Scraping Script to Scrape Github User Profile Info & Save it in Excel File

app.py

import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_github_profile(username):
    url = f'https://github.com/{username}'
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        print(f"Failed to fetch user: {username}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract profile data
    name_tag = soup.find('span', class_='p-name')
    name = name_tag.text.strip() if name_tag else 'N/A'

    biography_tag = soup.find('div',class_="p-note user-profile-bio mb-3 js-user-profile-bio f4")
    biography = biography_tag.text.strip() if biography_tag else 'N/A'

    company_tag = soup.find('span', class_='p-org')
    company = company_tag.text.strip() if company_tag else 'N/A'

    location_tag = soup.find('span', class_='p-label')
    location = location_tag.text.strip() if location_tag else 'N/A'

    # Extract repository count
    repo_tag = soup.find('a', href=f'/{username}?tab=repositories')
    print(repo_tag)
    repos = repo_tag.find('span', class_='Counter').text.strip() if repo_tag else 'N/A'

    # Extract avatar URL
    avatar_tag = soup.find('img', class_='avatar-user')
    avatar_url = avatar_tag['src'] if avatar_tag else 'N/A'

    # Return as dictionary
    return {
        'Username': username,
        'Biography':biography,
        'Name': name,
        'Company': company,
        'Location': location,
        'Public Repos': repos,
        'Avatar URL': avatar_url
    }

# Example usage
if __name__ == '__main__':
    username = 'bradtraversy'  # Replace with desired GitHub username
    profile_data = scrape_github_profile(username)

    if profile_data:
        df = pd.DataFrame([profile_data])
        df.to_excel(f'{username}_profile.xlsx', index=False)
        print(f'Data saved to {username}_profile.xlsx')

import requests

from bs4 import BeautifulSoup

import pandas as pd

def scrape_github_profile(username):

url = f'https://github.com/{username}'

headers = {'User-Agent': 'Mozilla/5.0'}

response = requests.get(url, headers=headers)

if response.status_code != 200:

print(f"Failed to fetch user: {username}")

return None

soup = BeautifulSoup(response.text, 'html.parser')

# Extract profile data

name_tag = soup.find('span', class_='p-name')

name = name_tag.text.strip() if name_tag else 'N/A'

biography_tag = soup.find('div',class_="p-note user-profile-bio mb-3 js-user-profile-bio f4")

biography = biography_tag.text.strip() if biography_tag else 'N/A'

company_tag = soup.find('span', class_='p-org')

company = company_tag.text.strip() if company_tag else 'N/A'

location_tag = soup.find('span', class_='p-label')

location = location_tag.text.strip() if location_tag else 'N/A'

# Extract repository count

repo_tag = soup.find('a', href=f'/{username}?tab=repositories')

print(repo_tag)

repos = repo_tag.find('span', class_='Counter').text.strip() if repo_tag else 'N/A'

# Extract avatar URL

avatar_tag = soup.find('img', class_='avatar-user')

avatar_url = avatar_tag['src'] if avatar_tag else 'N/A'

# Return as dictionary

return {

'Username': username,

'Biography':biography,

'Name': name,

'Company': company,

'Location': location,

'Public Repos': repos,

'Avatar URL': avatar_url

}

# Example usage

if __name__ == '__main__':

username = 'bradtraversy' # Replace with desired GitHub username

profile_data = scrape_github_profile(username)

if profile_data:

df = pd.DataFrame([profile_data])

df.to_excel(f'{username}_profile.xlsx', index=False)

print(f'Data saved to {username}_profile.xlsx')

Python 3 Github API V3 Web Scraping Script to Scrape Github User Profile Info & Save it in Excel File

Comments

Leave a Reply Cancel reply

Archives