2023-11-09 18:47:11 +01:00

190 lines
6.6 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun May 1 09:37:53 2022
@author: elena
"""
import cv2
import doxapy
import numpy as np
import scipy.ndimage as inter
class BBox:
def __init__(self, x, y, w, h):
self.x = x
self.y = y
self.w = w
self.h = h
def __repr__(self):
return "x: {:d}, y: {:d}, w: {:d}, h: {:d}".format(self.x,self.y,self.w,self.h)
def crop_binarize(inputfile: str, outputfile: str) -> None:
'''
Load inputfile from disk, apply binarization & threasholding and save as outputfile. Works best with pictures from a smartphone
Args:
inputfile (str): Filename of the colored picture with receipe
outputfile (str): Filename of the binarized & cropped picture
Returns:
None
'''
img = cv2.imread(inputfile)
# Rotate image
(H,W) = img.shape[:2]
if H < W:
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
(H,W) = img.shape[:2]
# Save a copy of the image before we do the transformations
orig = img.copy()
#%% Start the cropping procedure
##(1) convert to hsv-space, then split the channels
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h,s,v = cv2.split(hsv)
##(2) threshold the S channel using adaptive method(`THRESH_OTSU`) or fixed thresh
th, threshed = cv2.threshold(s, 35, 255, cv2.THRESH_BINARY_INV)
#threshed = cv2.bitwise_not(threshed)
##(3) find all the external contours on the threshed S
cnts = cv2.findContours(threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
canvas = img.copy()
## (4) sort and choose the largest contour
cnts = sorted(cnts, key = cv2.contourArea)
cnt = cnts[-1]
## (5) approx the contour, so the get the corner points
arclen = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02* arclen, True)
cv2.drawContours(canvas, [approx], -1, (0, 0, 255), 1, cv2.LINE_AA)
xcoords = approx[:,:,0]
ycoords = approx[:,:,1]
#rect = np.array([[max(xcoords),max(ycoords)],[min(xcoords),min(ycoords)],[min(xcoords),max(ycoords)],[max(xcoords),min(ycoords)]])
#cv2.drawContours(canvas, [rect], -1, (0, 255, 0), 1, cv2.LINE_AA)
## (6) Crop the original picture
crop = orig[int(min(ycoords)[0]):int(max(ycoords)[0]), int(min(xcoords)[0]):int(max(xcoords)[0])]
border = 30
(H,W) = crop.shape[:2]
crop = crop[:,border:W-border]
#%% Start the binarization
## (1) Convert to gray
crop = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
## (2) Performing gaussian blur with following adaptive thresholding
crop = cv2.GaussianBlur(crop,(5,5),0)
thresh1 = cv2.adaptiveThreshold(crop,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,17,2)
#thresh1 = cv2.adaptiveThreshold(crop,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
# cv2.THRESH_BINARY,11,10)
#%% Apply improvements for text
# Applying dilation on the threshold image to get better letters, without interruptions
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
erose = cv2.erode(cv2.bitwise_not(thresh1), rect_kernel, iterations = 1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
dilation = cv2.dilate(erose, rect_kernel, iterations = 1)
out = cv2.bitwise_not(dilation)
##% Remove some artefacts from the previous steps
# Mainly vertical lines
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,3))
dilate = cv2.dilate(cv2.bitwise_not(out), kernel, iterations=3)
edge = cv2.Canny(dilate, 100, 250)
cnts = cv2.findContours(edge, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
color = cv2.cvtColor(dilate, cv2.COLOR_GRAY2BGR)
badboxes = []
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
if h > 150:
a = BBox(x,y,w,h)
badboxes.append(a)
cv2.drawContours(color, [c], -1, (0, 255, 0), 3, cv2.LINE_AA)
cv2.rectangle(color,(x,y),(x+w,y+h),(255,0,0),3)
for element in badboxes:
out[element.y:element.y+element.h,element.x:element.x+element.w] = 255
cv2.imwrite(outputfile,out)
def correct_skew(image: np.array, delta:float = 0.1, limit: int = 2) -> (float, np.array):
'''
Here's an implementation of the Projection Profile Method algorithm for skew angle estimation.
Various angle points are projected into an accumulator array where the skew angle can be defined
as the angle of projection within a search interval that maximizes alignment. The idea is to
rotate the image at various angles and generate a histogram of pixels for each iteration. To
determine the skew angle, we compare the maximum difference between peaks and using this skew
angle, rotate the image to correct the skew.
Args:
image (np.array): image to apply skew corretion
delta (float): increment streps for angle
limit (int): maximal angle to test for
Returns:
best_angle (float): Calculated corretion angle
corrected (np.array): corrected image
'''
def determine_score(arr, angle):
data = inter.rotate(arr, angle, reshape=False, order=0)
histogram = np.sum(data, axis=1, dtype=float)
score = np.sum((histogram[1:] - histogram[:-1]) ** 2, dtype=float)
return histogram, score
scores = []
angles = np.arange(-limit, limit + delta, delta)
for angle in angles:
histogram, score = determine_score(image, angle)
scores.append(score)
best_angle = angles[scores.index(max(scores))]
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
corrected = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, \
borderMode=cv2.BORDER_REPLICATE)
return best_angle, corrected
def crop_binarize_scanner(inputfile: str, outputfile: str) -> None:
'''
Load inputfile from disk, apply binarization & threasholding and save as outputfile. Works best with pictures from the scanner
Args:
inputfile (str): Filename of the colored picture with receipe
outputfile (str): Filename of the binarized & cropped picture
Returns:
None
'''
img = cv2.imread(inputfile, cv2.IMREAD_GRAYSCALE)
binary_image = np.empty(img.shape, img.dtype)
sauvola = doxapy.Binarization(doxapy.Binarization.Algorithms.ISAUVOLA)
sauvola.initialize(img)
sauvola.to_binary(binary_image, {"window": 45, "k": 0.009})
angle, out = correct_skew(binary_image)
print(angle)
cv2.imwrite(outputfile,out)