scanproc/scanproc.py

199 lines
6.2 KiB
Python
Executable File

#!/usr/bin/env python3
import easyocr
import cv2 as cv
import numpy as np
import warnings
from tqdm import tqdm
from deskew import determine_skew
from PIL import Image, ImageOps, ImageEnhance
from entrypoint2 import entrypoint
from pathlib import Path
from tempfile import TemporaryDirectory
from subprocess import run
from pdfutil import mkpdf
warnings.filterwarnings("ignore")
def rotate(img, angle: float):
(h, w) = img.shape[:2]
center = (w//2, h//2)
M = cv.getRotationMatrix2D(center, angle, 1.0)
return cv.warpAffine(img, M, (w, h), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE)
def getRot(mask):
(h, w) = mask.shape[:2]
nw = min(w, 500)
nh = int(h * (nw / w))
sm = cv.resize(mask, (nw,nh))
return determine_skew(sm)
def getMono(img):
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
_, mono = cv.threshold(gray, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
kernel = np.ones((3,3),np.uint8)
mono = cv.morphologyEx(mono, cv.MORPH_CLOSE, kernel)
return mono
def getColorMask(img, dpi):
hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
lower_sat = np.array([0,40,10])
upper_sat = np.array([255,255,255])
mask = cv.inRange(hsv, lower_sat, upper_sat)
ksiz = int(dpi*0.005)
ksiz -= ksiz%2-1
kernel = np.ones((ksiz,ksiz),np.uint8)
mask = cv.erode(mask, kernel)
mask = cv.dilate(mask, kernel, iterations = 5)
br = int(dpi*.1)
br -= br%2-1
mask = cv.GaussianBlur(mask, (br,br), 0)
_, mask = cv.threshold(mask, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
return mask
def autoColorContrast(img, mono, dpi):
ksiz = int(dpi*0.005)
ksiz -= ksiz%2-1
kernel = np.ones((ksiz,ksiz),np.uint8)
mask = cv.bitwise_not(mono)
mask = cv.dilate(mask, kernel, iterations = 5)
pim = Image.fromarray(cv.cvtColor(img, cv.COLOR_BGR2RGB))
pimask = Image.fromarray(mask)
color = ImageOps.autocontrast(pim, (20, 30), mask=pimask, preserve_tone=True)
color = cv.cvtColor(np.asarray(color), cv.COLOR_BGR2HLS)
(H, L, S) = cv.split(color)
L = L.astype("float32")
L *= 1.3
L = np.clip(L, 0, 255)
L = L.astype("uint8")
return cv.cvtColor(cv.merge((H, L, S)), cv.COLOR_HLS2RGB)
def getColorSegments(img, mono, cmask):
contours, hierarchy = cv.findContours(cmask, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
for c in contours:
rect = cv.boundingRect(c)
(x1, y1, x2, y2) = rect
x2 += x1
y2 += y1
yield (x1, y1, x2, y2), img[y1:y2, x1:x2], cmask[y1:y2, x1:x2]
def unsharpMask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):
blurred = cv.GaussianBlur(image, kernel_size, sigma)
sharpened = float(amount + 1) * image - float(amount) * blurred
sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
sharpened = sharpened.round().astype(np.uint8)
if threshold > 0:
low_contrast_mask = np.absolute(image - blurred) < threshold
np.copyto(sharpened, image, where=low_contrast_mask)
return sharpened
def processImage(img, reader, dpi):
with tqdm(total=8, leave=False) as t:
t.set_description("Reading image")
im = cv.imread(img)
t.update()
t.set_description("Filter")
mono = getMono(im)
im = cv.cvtColor(im, cv.COLOR_RGB2Lab)
(L, a, b) = cv.split(im)
ksiz = int(dpi*0.015)
ksiz -= ksiz%2-1
L = unsharpMask(L, kernel_size=(ksiz,ksiz), amount=2)
L = cv.bilateralFilter(L, -1, 12, dpi*0.018)
im = cv.cvtColor(cv.merge((L, a, b)), cv.COLOR_Lab2RGB)
im = autoColorContrast(im, mono, dpi)
t.update()
t.set_description("Detect skew")
angle = getRot(im)
t.update()
t.set_description("Deskew")
im = rotate(im, angle)
mono = rotate(mono, angle)
t.update()
t.set_description("OCR")
text = reader.readtext(mono)
t.update()
t.set_description("Color mask")
cmask = getColorMask(im, dpi)
t.update()
t.set_description("Color segments")
csegs = getColorSegments(im, mono, cmask)
t.update()
mono[cmask==255] = 255
return mono, csegs, text
@entrypoint
def main(output, langs=["en"], dpi=600, *imgs):
reader = easyocr.Reader(langs)
with tqdm(total=3, leave=False) as t:
with TemporaryDirectory() as tmp:
tp = Path(tmp)
files = []
colorimgs = []
texts = []
t.set_description("Process pages")
with tqdm(total=len(imgs), leave=False) as pt:
for pagen, img in enumerate(imgs):
pt.set_description(f"Process {img}")
mono, csegs, text = processImage(img, reader, dpi)
fn = str(tp / f"p{pagen}.tif")
files.append(fn)
cv.imwrite(fn, mono)
pimgs = []
for i, seg in enumerate(csegs):
(r, simg, smask) = seg
bp = tp / f"p{pagen}_{i}.jpg"
mp = tp / f"p{pagen}_{i}_m.png"
cv.imwrite(str(bp), simg, [
cv.IMWRITE_JPEG_QUALITY, 90,
cv.IMWRITE_JPEG_OPTIMIZE, 1,
cv.IMWRITE_JPEG_PROGRESSIVE, 1])
cv.imwrite(str(mp), smask, [
cv.IMWRITE_PNG_BILEVEL, 1,
cv.IMWRITE_PNG_COMPRESSION, 9])
pimgs.append(((r), bp, mp))
colorimgs.append(pimgs)
texts.append(text)
pt.update()
t.update()
t.set_description("JBIG2 compress")
run(["jbig2", "-s", "-d", "-a", "-p", *files], capture_output=True, check=True, cwd=tp)
symtab = tp / "output.sym"
pageblobs = [tp / f"output.{p:04d}" for p in range(len(files))]
t.update()
t.set_description("Create PDF")
with open(output, "wb") as outf:
outf.write(mkpdf(symtab, pageblobs, colorimgs, texts, dpi))
t.update()