190 lines
6.6 KiB
Python
190 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Sun May 1 09:37:53 2022
|
|
|
|
@author: elena
|
|
"""
|
|
|
|
import cv2
|
|
import doxapy
|
|
import numpy as np
|
|
import scipy.ndimage as inter
|
|
|
|
class BBox:
|
|
def __init__(self, x, y, w, h):
|
|
self.x = x
|
|
self.y = y
|
|
self.w = w
|
|
self.h = h
|
|
|
|
def __repr__(self):
|
|
return "x: {:d}, y: {:d}, w: {:d}, h: {:d}".format(self.x,self.y,self.w,self.h)
|
|
|
|
def crop_binarize(inputfile: str, outputfile: str) -> None:
|
|
'''
|
|
Load inputfile from disk, apply binarization & threasholding and save as outputfile. Works best with pictures from a smartphone
|
|
|
|
Args:
|
|
inputfile (str): Filename of the colored picture with receipe
|
|
outputfile (str): Filename of the binarized & cropped picture
|
|
Returns:
|
|
None
|
|
'''
|
|
img = cv2.imread(inputfile)
|
|
|
|
# Rotate image
|
|
(H,W) = img.shape[:2]
|
|
if H < W:
|
|
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
|
|
|
|
(H,W) = img.shape[:2]
|
|
|
|
# Save a copy of the image before we do the transformations
|
|
orig = img.copy()
|
|
|
|
|
|
#%% Start the cropping procedure
|
|
|
|
##(1) convert to hsv-space, then split the channels
|
|
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
|
h,s,v = cv2.split(hsv)
|
|
|
|
##(2) threshold the S channel using adaptive method(`THRESH_OTSU`) or fixed thresh
|
|
th, threshed = cv2.threshold(s, 35, 255, cv2.THRESH_BINARY_INV)
|
|
#threshed = cv2.bitwise_not(threshed)
|
|
|
|
##(3) find all the external contours on the threshed S
|
|
cnts = cv2.findContours(threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
|
|
canvas = img.copy()
|
|
|
|
## (4) sort and choose the largest contour
|
|
cnts = sorted(cnts, key = cv2.contourArea)
|
|
cnt = cnts[-1]
|
|
|
|
## (5) approx the contour, so the get the corner points
|
|
arclen = cv2.arcLength(cnt, True)
|
|
approx = cv2.approxPolyDP(cnt, 0.02* arclen, True)
|
|
cv2.drawContours(canvas, [approx], -1, (0, 0, 255), 1, cv2.LINE_AA)
|
|
|
|
xcoords = approx[:,:,0]
|
|
ycoords = approx[:,:,1]
|
|
|
|
#rect = np.array([[max(xcoords),max(ycoords)],[min(xcoords),min(ycoords)],[min(xcoords),max(ycoords)],[max(xcoords),min(ycoords)]])
|
|
#cv2.drawContours(canvas, [rect], -1, (0, 255, 0), 1, cv2.LINE_AA)
|
|
|
|
## (6) Crop the original picture
|
|
crop = orig[int(min(ycoords)[0]):int(max(ycoords)[0]), int(min(xcoords)[0]):int(max(xcoords)[0])]
|
|
border = 30
|
|
(H,W) = crop.shape[:2]
|
|
crop = crop[:,border:W-border]
|
|
|
|
#%% Start the binarization
|
|
|
|
## (1) Convert to gray
|
|
crop = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
|
|
|
## (2) Performing gaussian blur with following adaptive thresholding
|
|
crop = cv2.GaussianBlur(crop,(5,5),0)
|
|
thresh1 = cv2.adaptiveThreshold(crop,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
|
|
cv2.THRESH_BINARY,17,2)
|
|
#thresh1 = cv2.adaptiveThreshold(crop,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
|
|
# cv2.THRESH_BINARY,11,10)
|
|
|
|
|
|
#%% Apply improvements for text
|
|
|
|
# Applying dilation on the threshold image to get better letters, without interruptions
|
|
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
|
|
erose = cv2.erode(cv2.bitwise_not(thresh1), rect_kernel, iterations = 1)
|
|
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
|
|
dilation = cv2.dilate(erose, rect_kernel, iterations = 1)
|
|
out = cv2.bitwise_not(dilation)
|
|
|
|
##% Remove some artefacts from the previous steps
|
|
# Mainly vertical lines
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,3))
|
|
dilate = cv2.dilate(cv2.bitwise_not(out), kernel, iterations=3)
|
|
edge = cv2.Canny(dilate, 100, 250)
|
|
cnts = cv2.findContours(edge, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
|
|
|
|
color = cv2.cvtColor(dilate, cv2.COLOR_GRAY2BGR)
|
|
|
|
badboxes = []
|
|
|
|
for c in cnts:
|
|
x,y,w,h = cv2.boundingRect(c)
|
|
if h > 150:
|
|
a = BBox(x,y,w,h)
|
|
badboxes.append(a)
|
|
cv2.drawContours(color, [c], -1, (0, 255, 0), 3, cv2.LINE_AA)
|
|
cv2.rectangle(color,(x,y),(x+w,y+h),(255,0,0),3)
|
|
|
|
for element in badboxes:
|
|
out[element.y:element.y+element.h,element.x:element.x+element.w] = 255
|
|
|
|
cv2.imwrite(outputfile,out)
|
|
|
|
|
|
def correct_skew(image: np.array, delta:float = 0.1, limit: int = 2) -> (float, np.array):
|
|
'''
|
|
Here's an implementation of the Projection Profile Method algorithm for skew angle estimation.
|
|
Various angle points are projected into an accumulator array where the skew angle can be defined
|
|
as the angle of projection within a search interval that maximizes alignment. The idea is to
|
|
rotate the image at various angles and generate a histogram of pixels for each iteration. To
|
|
determine the skew angle, we compare the maximum difference between peaks and using this skew
|
|
angle, rotate the image to correct the skew.
|
|
|
|
Args:
|
|
image (np.array): image to apply skew corretion
|
|
delta (float): increment streps for angle
|
|
limit (int): maximal angle to test for
|
|
Returns:
|
|
best_angle (float): Calculated corretion angle
|
|
corrected (np.array): corrected image
|
|
'''
|
|
def determine_score(arr, angle):
|
|
data = inter.rotate(arr, angle, reshape=False, order=0)
|
|
histogram = np.sum(data, axis=1, dtype=float)
|
|
score = np.sum((histogram[1:] - histogram[:-1]) ** 2, dtype=float)
|
|
return histogram, score
|
|
|
|
scores = []
|
|
angles = np.arange(-limit, limit + delta, delta)
|
|
for angle in angles:
|
|
histogram, score = determine_score(image, angle)
|
|
scores.append(score)
|
|
|
|
best_angle = angles[scores.index(max(scores))]
|
|
|
|
(h, w) = image.shape[:2]
|
|
center = (w // 2, h // 2)
|
|
M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
|
|
corrected = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, \
|
|
borderMode=cv2.BORDER_REPLICATE)
|
|
|
|
return best_angle, corrected
|
|
|
|
def crop_binarize_scanner(inputfile: str, outputfile: str) -> None:
|
|
'''
|
|
Load inputfile from disk, apply binarization & threasholding and save as outputfile. Works best with pictures from the scanner
|
|
|
|
Args:
|
|
inputfile (str): Filename of the colored picture with receipe
|
|
outputfile (str): Filename of the binarized & cropped picture
|
|
Returns:
|
|
None
|
|
'''
|
|
img = cv2.imread(inputfile, cv2.IMREAD_GRAYSCALE)
|
|
|
|
binary_image = np.empty(img.shape, img.dtype)
|
|
sauvola = doxapy.Binarization(doxapy.Binarization.Algorithms.ISAUVOLA)
|
|
sauvola.initialize(img)
|
|
sauvola.to_binary(binary_image, {"window": 45, "k": 0.009})
|
|
|
|
angle, out = correct_skew(binary_image)
|
|
print(angle)
|
|
|
|
cv2.imwrite(outputfile,out)
|