app.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
import tabula import os # Extract all tables from the PDF (returns a list of DataFrames) dfs = tabula.read_pdf("sample.pdf", pages='all', multiple_tables=True) # Check how many tables were found print(f"Total tables extracted: {len(dfs)}") # Create output folder if not exists output_folder = "tables_csv" os.makedirs(output_folder, exist_ok=True) # Save each table to a separate CSV file if dfs: for i, df in enumerate(dfs): csv_path = os.path.join(output_folder, f"table_{i + 1}.csv") df.to_csv(csv_path, index=False) print(f"Saved: {csv_path}") else: print("No tables found.") |