app.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import fitz import io from PIL import Image file = "sample.pdf" pdf_file = fitz.open(file) for page_index in range(len(pdf_file)): page = pdf_file.load_page(page_index) # load the page image_list = page.get_images(full=True) # get images on the page if image_list: print(f"[+] Found a total of {len(image_list)} images on page {page_index}") else: print("[!] No images found on page", page_index) for image_index, img in enumerate(image_list, start=1): xref = img[0] base_image = pdf_file.extract_image(xref) image_bytes = base_image["image"] image_ext = base_image["ext"] image_name = f"image{page_index+1}_{image_index}.{image_ext}" with open(image_name, "wb") as image_file: image_file.write(image_bytes) print(f"[+] Image saved as {image_name}") |