app.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import cv2 import pytesseract # Path to tesseract.exe pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" # Load and preprocess image image = cv2.imread('code.png') gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (3,3), 0) thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Remove noise and invert kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3)) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1) invert = 255 - opening # OCR data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6') print(data) # Save text to a file with open("extracted_text.txt", "w", encoding="utf-8") as f: f.write(data) # Display intermediate images cv2.imshow('thresh', thresh) cv2.imshow('opening', opening) cv2.imshow('invert', invert) cv2.waitKey(0) cv2.destroyAllWindows() |