Python 3 Tabula Script to Extract Tables From PDF as Dataframes & Export as CSV File

geekygautam1997 May 12, 2025No Comments

app.py

import tabula
import os

# Extract all tables from the PDF (returns a list of DataFrames)
dfs = tabula.read_pdf("sample.pdf", pages='all', multiple_tables=True)

# Check how many tables were found
print(f"Total tables extracted: {len(dfs)}")

# Create output folder if not exists
output_folder = "tables_csv"
os.makedirs(output_folder, exist_ok=True)

# Save each table to a separate CSV file
if dfs:
    for i, df in enumerate(dfs):
        csv_path = os.path.join(output_folder, f"table_{i + 1}.csv")
        df.to_csv(csv_path, index=False)
        print(f"Saved: {csv_path}")
else:
    print("No tables found.")

import tabula

import os

# Extract all tables from the PDF (returns a list of DataFrames)

dfs = tabula.read_pdf("sample.pdf", pages='all', multiple_tables=True)

# Check how many tables were found

print(f"Total tables extracted: {len(dfs)}")

# Create output folder if not exists

output_folder = "tables_csv"

os.makedirs(output_folder, exist_ok=True)

# Save each table to a separate CSV file

if dfs:

for i, df in enumerate(dfs):

csv_path = os.path.join(output_folder, f"table_{i + 1}.csv")

df.to_csv(csv_path, index=False)

print(f"Saved: {csv_path}")

else:

print("No tables found.")

geekygautam1997

View All Posts

Comments

No comments yet. Why don’t you start the discussion?

Python 3 Tabula Script to Extract Tables From PDF as Dataframes & Export as CSV File

Comments

Leave a Reply Cancel reply

Archives