Python 3 OpenCV & Tesseract OCR Example to Extract Text From Image & Save it as TXT in Terminal

app.py

import cv2
import pytesseract

# Path to tesseract.exe
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load and preprocess image
image = cv2.imread('code.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Remove noise and invert
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening

# OCR
data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
print(data)

# Save text to a file
with open("extracted_text.txt", "w", encoding="utf-8") as f:
    f.write(data)

# Display intermediate images
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('invert', invert)
cv2.waitKey(0)
cv2.destroyAllWindows()

import cv2

import pytesseract

# Path to tesseract.exe

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load and preprocess image

image = cv2.imread('code.png')

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

blur = cv2.GaussianBlur(gray, (3,3), 0)

thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Remove noise and invert

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))

opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

invert = 255 - opening

# OCR

data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')

print(data)

# Save text to a file

with open("extracted_text.txt", "w", encoding="utf-8") as f:

f.write(data)

# Display intermediate images

cv2.imshow('thresh', thresh)

cv2.imshow('opening', opening)

cv2.imshow('invert', invert)

cv2.waitKey(0)

cv2.destroyAllWindows()

Python 3 OpenCV & Tesseract OCR Example to Extract Text From Image & Save it as TXT in Terminal

Comments

Leave a Reply Cancel reply

Archives