Python 3 Pandas Script to Parse & Extract All HTML Tables From Website URL and Save it as Excel File

app.py

import urllib.request
from html_table_parser.parser import HTMLTableParser
import pandas as pd
import os

# Function to get the HTML content from a URL
def url_get_contents(url):
    req = urllib.request.Request(url=url)
    f = urllib.request.urlopen(req)
    return f.read()

# Create output directory
output_folder = "scraped_tables_excel"
os.makedirs(output_folder, exist_ok=True)

# URL to scrape
url = 'https://manuals.gfi.com/en/exinda/help/content/exos/sql-access/sql-urls.htm'
xhtml = url_get_contents(url).decode('utf-8')

# Parse the tables
p = HTMLTableParser()
p.feed(xhtml)

# Save each table as an Excel file
for idx, table in enumerate(p.tables):
    df = pd.DataFrame(table)
    excel_filename = os.path.join(output_folder, f'table_{idx+1}.xlsx')
    df.to_excel(excel_filename, index=False)
    print(f"Saved: {excel_filename}")

import urllib.request

from html_table_parser.parser import HTMLTableParser

import pandas as pd

import os

# Function to get the HTML content from a URL

def url_get_contents(url):

req = urllib.request.Request(url=url)

f = urllib.request.urlopen(req)

return f.read()

# Create output directory

output_folder = "scraped_tables_excel"

os.makedirs(output_folder, exist_ok=True)

# URL to scrape

url = 'https://manuals.gfi.com/en/exinda/help/content/exos/sql-access/sql-urls.htm'

xhtml = url_get_contents(url).decode('utf-8')

# Parse the tables

p = HTMLTableParser()

p.feed(xhtml)

# Save each table as an Excel file

for idx, table in enumerate(p.tables):

df = pd.DataFrame(table)

excel_filename = os.path.join(output_folder, f'table_{idx+1}.xlsx')

df.to_excel(excel_filename, index=False)

print(f"Saved: {excel_filename}")

Python 3 Pandas Script to Parse & Extract All HTML Tables From Website URL and Save it as Excel File

Comments

Leave a Reply Cancel reply

Archives