#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun May 1 09:37:53 2022 @author: elena """ import cv2 import doxapy import numpy as np import scipy.ndimage as inter class BBox: def __init__(self, x, y, w, h): self.x = x self.y = y self.w = w self.h = h def __repr__(self): return "x: {:d}, y: {:d}, w: {:d}, h: {:d}".format(self.x,self.y,self.w,self.h) def crop_binarize(inputfile: str, outputfile: str) -> None: ''' Load inputfile from disk, apply binarization & threasholding and save as outputfile. Works best with pictures from a smartphone Args: inputfile (str): Filename of the colored picture with receipe outputfile (str): Filename of the binarized & cropped picture Returns: None ''' img = cv2.imread(inputfile) # Rotate image (H,W) = img.shape[:2] if H < W: img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) (H,W) = img.shape[:2] # Save a copy of the image before we do the transformations orig = img.copy() #%% Start the cropping procedure ##(1) convert to hsv-space, then split the channels hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) h,s,v = cv2.split(hsv) ##(2) threshold the S channel using adaptive method(`THRESH_OTSU`) or fixed thresh th, threshed = cv2.threshold(s, 35, 255, cv2.THRESH_BINARY_INV) #threshed = cv2.bitwise_not(threshed) ##(3) find all the external contours on the threshed S cnts = cv2.findContours(threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2] canvas = img.copy() ## (4) sort and choose the largest contour cnts = sorted(cnts, key = cv2.contourArea) cnt = cnts[-1] ## (5) approx the contour, so the get the corner points arclen = cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, 0.02* arclen, True) cv2.drawContours(canvas, [approx], -1, (0, 0, 255), 1, cv2.LINE_AA) xcoords = approx[:,:,0] ycoords = approx[:,:,1] #rect = np.array([[max(xcoords),max(ycoords)],[min(xcoords),min(ycoords)],[min(xcoords),max(ycoords)],[max(xcoords),min(ycoords)]]) #cv2.drawContours(canvas, [rect], -1, (0, 255, 0), 1, cv2.LINE_AA) ## (6) Crop the original picture crop = orig[int(min(ycoords)[0]):int(max(ycoords)[0]), int(min(xcoords)[0]):int(max(xcoords)[0])] border = 30 (H,W) = crop.shape[:2] crop = crop[:,border:W-border] #%% Start the binarization ## (1) Convert to gray crop = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) ## (2) Performing gaussian blur with following adaptive thresholding crop = cv2.GaussianBlur(crop,(5,5),0) thresh1 = cv2.adaptiveThreshold(crop,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ cv2.THRESH_BINARY,17,2) #thresh1 = cv2.adaptiveThreshold(crop,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\ # cv2.THRESH_BINARY,11,10) #%% Apply improvements for text # Applying dilation on the threshold image to get better letters, without interruptions rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1)) erose = cv2.erode(cv2.bitwise_not(thresh1), rect_kernel, iterations = 1) rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4)) dilation = cv2.dilate(erose, rect_kernel, iterations = 1) out = cv2.bitwise_not(dilation) ##% Remove some artefacts from the previous steps # Mainly vertical lines kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,3)) dilate = cv2.dilate(cv2.bitwise_not(out), kernel, iterations=3) edge = cv2.Canny(dilate, 100, 250) cnts = cv2.findContours(edge, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] color = cv2.cvtColor(dilate, cv2.COLOR_GRAY2BGR) badboxes = [] for c in cnts: x,y,w,h = cv2.boundingRect(c) if h > 150: a = BBox(x,y,w,h) badboxes.append(a) cv2.drawContours(color, [c], -1, (0, 255, 0), 3, cv2.LINE_AA) cv2.rectangle(color,(x,y),(x+w,y+h),(255,0,0),3) for element in badboxes: out[element.y:element.y+element.h,element.x:element.x+element.w] = 255 cv2.imwrite(outputfile,out) def correct_skew(image: np.array, delta:float = 0.1, limit: int = 2) -> (float, np.array): ''' Here's an implementation of the Projection Profile Method algorithm for skew angle estimation. Various angle points are projected into an accumulator array where the skew angle can be defined as the angle of projection within a search interval that maximizes alignment. The idea is to rotate the image at various angles and generate a histogram of pixels for each iteration. To determine the skew angle, we compare the maximum difference between peaks and using this skew angle, rotate the image to correct the skew. Args: image (np.array): image to apply skew corretion delta (float): increment streps for angle limit (int): maximal angle to test for Returns: best_angle (float): Calculated corretion angle corrected (np.array): corrected image ''' def determine_score(arr, angle): data = inter.rotate(arr, angle, reshape=False, order=0) histogram = np.sum(data, axis=1, dtype=float) score = np.sum((histogram[1:] - histogram[:-1]) ** 2, dtype=float) return histogram, score scores = [] angles = np.arange(-limit, limit + delta, delta) for angle in angles: histogram, score = determine_score(image, angle) scores.append(score) best_angle = angles[scores.index(max(scores))] (h, w) = image.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, best_angle, 1.0) corrected = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, \ borderMode=cv2.BORDER_REPLICATE) return best_angle, corrected def crop_binarize_scanner(inputfile: str, outputfile: str) -> None: ''' Load inputfile from disk, apply binarization & threasholding and save as outputfile. Works best with pictures from the scanner Args: inputfile (str): Filename of the colored picture with receipe outputfile (str): Filename of the binarized & cropped picture Returns: None ''' img = cv2.imread(inputfile, cv2.IMREAD_GRAYSCALE) binary_image = np.empty(img.shape, img.dtype) sauvola = doxapy.Binarization(doxapy.Binarization.Algorithms.ISAUVOLA) sauvola.initialize(img) sauvola.to_binary(binary_image, {"window": 45, "k": 0.009}) angle, out = correct_skew(binary_image) print(angle) cv2.imwrite(outputfile,out)