diff --git a/.gitignore b/.gitignore index 79e3b1b5455702cd2d86be384f3da8c7e0d69f41..42de8ba8a80be8ea44b9e0ade74f8dd2a5a0490b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea .ipynb_checkpoints __pycache__ .venv diff --git a/output/Laufon_009_preprocessed.pdf b/output/Laufon_009_preprocessed.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1e11809e2e53e335cee07e9f381513417bf44a59 Binary files /dev/null and b/output/Laufon_009_preprocessed.pdf differ diff --git a/output/Zeitungsausschnitt_preprocessed.pdf b/output/Zeitungsausschnitt_preprocessed.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f82b8d7c84f95ffdcce2170b787537156db84659 Binary files /dev/null and b/output/Zeitungsausschnitt_preprocessed.pdf differ diff --git a/preprocessing.py b/preprocessing.py index a3344e138ec2c4681622ed2653326ad5b5385aff..4864a74386fe3e0f751400c57e8f15ec8ce99024 100644 --- a/preprocessing.py +++ b/preprocessing.py @@ -3,14 +3,12 @@ # desc: Provides functions to preprocess image to enhcance tesseract reults, currently applied: # - deskew image (code adapted from https://docs.opencv.org/4.7.0/d1/dee/tutorial_introduction_to_pca.html) # - crop image border regions -# - upscale image import os import cv2 as cv import numpy as np import matplotlib.pyplot as plt from math import atan2, cos, sin, sqrt, pi -# import editdistance from IPython.display import HTML, Image, display @@ -302,9 +300,10 @@ def preprocess_pipeline(image, debug=False, debug_path=None): img_rot, img_angle = deskew_image_pca(image, debug=debug, debug_path=debug_path) img_bb = get_cropping_corner_points(img_rot, threshold_border=0.05, debug=debug, debug_path=debug_path) img_crop = crop_bb_from_img(img_rot, img_bb, margin=15) + # Exclude upscaling for this demo # img_upscld = upscale_image(img_crop, scaling=2) img_res = cv.cvtColor(img_crop, cv.COLOR_BGR2GRAY) - # Following makes things gloablly worse, so I excluded these steps + # Optional other preprocessing steps # img_erode = cv.erode(img_gray, cv.getStructuringElement(cv.MORPH_CROSS, (3, 3)), iterations=1) # img_blur = cv.GaussianBlur(img_bright_contrast, (3, 3), 2) # img_sharp = unsharp_mask(img_gray, kernel_size=(3, 3), sigma=1.5, amount=5.0, threshold=50) @@ -314,67 +313,16 @@ def preprocess_pipeline(image, debug=False, debug_path=None): if __name__ == '__main__': images = [ - '/home/simon/Downloads/Ruthenica_sample/tesseract_output/00000001.jpg', - '/home/simon/Downloads/Ruthenica_sample/tesseract_output/00000003.jpg', - '/home/simon/Downloads/Ruthenica_sample/tesseract_output/00000004.jpg' + 'img/Laufon_009.jpg', + 'img/Zeitungsausschnitt.jpg' ] - # images = [ - # 'img/test.jpg', - # 'img/Laufon_009.jpg', - # 'img/Laufon_012.jpg', - # 'img/Laufon_013.jpg', - # 'img/Laufon_055.jpg', - # 'img/Laufon_094.jpg', - # 'img/Laufon_110.jpg', - # 'img/Dresden_008.jpg', - # 'img/Dresden_020.jpg', - # 'img/Dresden_037.jpg', - # 'img/Dresden_063.jpg', - # 'img/Dresden_094.jpg', - # 'img/Dresden_132.jpg', - # 'img/00002_1003102D_00000021.jpg', - # 'img/00005_10026184_00000014.jpg', - # 'img/00063_10029F7D_00000022.jpg' - # ] - # measured_angles = [-0.65, 0.1, -1.3, -0.4, 0.85, 1.2, - # -0.1, -0.7, 0.5, -0.4, -0.7, -1.3, - # 0.5, 0.3, -0.7] - # analyse_deskew_method(images, measured_angles, debug=False) - # ground_truth_fps = [ - # 'ground_truths/GT_Laufon_009.txt', - # 'ground_truths/GT_Laufon_012.txt', - # 'ground_truths/GT_Laufon_013.txt', - # 'ground_truths/GT_Laufon_055.txt', - # 'ground_truths/GT_Laufon_094.txt', - # 'ground_truths/GT_Laufon_110.txt', - # 'ground_truths/GT_Dresden_008.txt', - # 'ground_truths/GT_Dresden_020.txt', - # 'ground_truths/GT_Dresden_037.txt', - # 'ground_truths/GT_Dresden_063.txt', - # 'ground_truths/GT_Dresden_094.txt', - # 'ground_truths/GT_Dresden_132.txt', - # 'ground_truths/GT_00002_1003102D_00000021.txt', - # 'ground_truths/GT_00005_10026184_00000014.txt', - # 'ground_truths/GT_00063_10029F7D_00000022.txt' - # ] - - # ground_truths = [open(fp).read() for fp in ground_truth_fps] - # levenshtein_distances = [] - for img_path in images: img_path_preprocessed = f'{img_path.replace(".jpg", "")}_preprocessed.jpg' ocred_path_base_preprocessed = img_path_preprocessed.replace('.jpg', '') - print(f'Processing image {img_path}') input_image = cv.imread(img_path) - preprocessed_img = preprocess_pipeline(input_image, debug=True) - # cv.imshow('Preprocessed image, rotated, cropped and resized', preprocessed_img) - # cv.waitKey() - + preprocessed_img = preprocess_pipeline(input_image, debug=True) cv.imwrite(img_path_preprocessed, preprocessed_img) os.system(f'tesseract -l ukr --dpi 300 --psm 4 {img_path_preprocessed} {ocred_path_base_preprocessed} txt') - - # print('Levenshtein distances are:', levenshtein_distances) - # print('L2 norm is:', np.linalg.norm(levenshtein_distances))