Python 3 PyPDF2 Script to Add Bookmarks to PDF Document Using pdfbookmarker Library Full Project For Beginners



pip install pypdf2



pip install pdfbookmarker



After installing this library make an file and copy paste the following code



#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Add bookmarks to existing PDF files
  $ pdfbm [options] <FILE.pdf> [FILE.txt] [FILE-new.pdf]
  -h, --help    show this help
  $ pdfbm FILE.pdf  # will read FILE.pdf as PDF, FILE.txt as a
  bookmarks file and shall give the FILE-new.pdf as output.
Hence, parameters FILE.txt and FILE-new.pdf are optional, hah.

import codecs
import os
import re
import sys

from PyPDF2 import PdfFileMerger, PdfFileReader

__version__ = '0.6.0'
__author__ = 'RussellLuo'
__email__ = ''
__license__ = 'MIT'

def add_bookmarks(pdf_in_filename, bookmarks_tree, pdf_out_filename=None):
    """Add bookmarks to existing PDF files
    Some useful references:
    pdf_in = PdfFileReader(pdf_in_filename)

    # merge `pdf_in` into `pdf_out`, using PyPDF2.PdfFileMerger()
    pdf_out = PdfFileMerger()
    pdf_out.append(pdf_in, import_bookmarks=False)

    # copy/preserve existing document info
    doc_info = pdf_in.getDocumentInfo()
    if doc_info:

    def crawl_tree(tree, parent):
        for title, page_num, subtree in tree:
            current = pdf_out.addBookmark(title, page_num, parent) # add parent bookmark
            if subtree:
                crawl_tree(subtree, current)

    # add bookmarks into `pdf_out` by crawling `bookmarks_tree`
    crawl_tree(bookmarks_tree, None)

    # get `pdf_out_filename` if it's not specified
    if not pdf_out_filename:
        name_parts = os.path.splitext(pdf_in_filename)
        pdf_out_filename = name_parts[0] + '-new' + name_parts[1]

    # write all data to the given file

    return pdf_out_filename

def get_bookmarks_tree(bookmarks_filename):
    """Get bookmarks tree from TEXT-format file
    Bookmarks tree structure:
        >>> get_bookmarks_tree('sample_bookmarks.txt')
        [(u'Foreword', 0, []), (u'Chapter 1: Introduction', 1, [(u'1.1 Python', 1, [(u'1.1.1 Basic syntax', 1, []), (u'1.1.2 Hello world', 2, [])]), (u'1.2 Exercises', 3, [])]), (u'Chapter 2: Conclusion', 4, [])]
    The above test result may be more readable in the following format:
            (u'Foreword', 0, []),
            (u'Chapter 1: Introduction', 1,
                    (u'1.1 Python', 1,
                            (u'1.1.1 Basic syntax', 1, []),
                            (u'1.1.2 Hello world', 2, [])
                    (u'1.2 Exercises', 3, [])
            (u'Chapter 2: Conclusion', 4, [])
    Thanks Stefan, who share us a perfect solution for Python tree.
    Since dictionary in Python is unordered, I use list instead now.
    Also thanks Caicono, who inspiring me that it's not a bad idea to record bookmark titles and page numbers by hand.
    See here:
    And I think it's the only solution for scan version PDFs to be processed automatically.

    # bookmarks tree
    tree = []

    # the latest nodes (the old node will be replaced by a new one if they have the same level)
    # each item (key, value) in dictionary represents a node
    # `key`: the level of the node
    # `value`: the children list of the node
    latest_nodes = {0: tree}

    offset = 0
    prev_level = 0
    for line in, 'r', encoding='utf-8'):
        line = line.strip()
        if line.startswith('//'):
                offset = int(line[2:])
            except ValueError:
        res = re.match(r'(\+*)\s*?"([^"]+)"\s*\|\s*(\d+)', line)
        if res:
            pluses, title, page_num = res.groups()
            cur_level = len(pluses)  # plus count stands for level
            cur_node = (title, int(page_num) - 1 + offset, [])

            if not (0 < cur_level <= prev_level + 1):
                raise Exception('plus (+) count is invalid here: %s' % line)
                # append the current node into its parent node (with the level `cur_level` - 1)
                latest_nodes[cur_level - 1].append(cur_node)

            latest_nodes[cur_level] = cur_node[2]
            prev_level = cur_level

    return tree

# run as a script
def run_script(pdf_in_filename, bookmarks_filename, pdf_out_filename=None):
    sys.stderr.write('In processing, please wait...\n')
        bookmarks_tree = get_bookmarks_tree(bookmarks_filename)
        pdf_out_filename = add_bookmarks(pdf_in_filename, bookmarks_tree, pdf_out_filename)
    except Exception as exc:
        sys.stderr.write("error:\n%s\n" % str(exc))
        sys.stderr.write("New PDF generated: %s\n" % pdf_out_filename)

# documentation test
def doc_test():
    import doctest

# test and, or execute
def main():
    if len(sys.argv) not in (2, 3, 4) or sys.argv[1] in ('-h', '--help'):

    if sys.argv[1] in ('-t', '--test'):
    elif len(sys.argv) == 2:
        name_parts = os.path.splitext(sys.argv[1])
        run_script(sys.argv[1], name_parts[0] + '.txt', pdf_out_filename=None)

if __name__ == '__main__':


See also  Python 3 Tkinter Mini Finance Expense Money Budget Tracker or Calculator GUI Desktop App Full Project For Beginners






+"Chapter 1: Introduction"|2
++"1.1 Python"|2
+++"1.1.1 Basic syntax"|2
+++"1.1.2 Hello world"|3
++"1.2 Exercises"|4
+"Chapter 2: Conclusion"|5

Leave a Reply