2022-08-28 20:00:13 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2022-08-28 20:31:50 +02:00
|
|
|
"""
|
|
|
|
Copyright © 2022 Mia Herkt
|
|
|
|
Licensed under the EUPL, Version 1.2 or - as soon as approved
|
|
|
|
by the European Commission - subsequent versions of the EUPL
|
|
|
|
(the "License");
|
|
|
|
You may not use this work except in compliance with the License.
|
|
|
|
You may obtain a copy of the license at:
|
|
|
|
|
|
|
|
https://joinup.ec.europa.eu/software/page/eupl
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
|
|
software distributed under the License is distributed on an
|
|
|
|
"AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
|
|
either express or implied.
|
|
|
|
See the License for the specific language governing permissions
|
|
|
|
and limitations under the License.
|
|
|
|
"""
|
|
|
|
|
2022-08-28 20:00:13 +02:00
|
|
|
import struct
|
|
|
|
import fitz
|
|
|
|
import cv2 as cv
|
|
|
|
|
|
|
|
class Dict:
|
|
|
|
def __init__(self, values={}):
|
|
|
|
self.d = {}
|
|
|
|
self.d.update(values)
|
|
|
|
|
|
|
|
def __bytes__(self):
|
|
|
|
s = [b"<< "]
|
|
|
|
|
|
|
|
for (x, y) in self.d.items():
|
|
|
|
s.append(b"/" + x + b" " + y + b"\n")
|
|
|
|
|
|
|
|
s.append(b">>\n")
|
|
|
|
return b"".join(s)
|
|
|
|
|
|
|
|
class Obj:
|
|
|
|
def __init__(self, d={}, stream=None):
|
|
|
|
if stream is not None:
|
|
|
|
d[b"Length"] = str(len(stream)).encode()
|
|
|
|
|
|
|
|
self.d = Dict(d)
|
|
|
|
self.stream = stream
|
|
|
|
self.idx = 0
|
|
|
|
|
|
|
|
def __bytes__(self):
|
|
|
|
s = [bytes(self.d)]
|
|
|
|
|
|
|
|
if self.stream is not None:
|
|
|
|
s.append(b"stream\n")
|
|
|
|
s.append(self.stream)
|
|
|
|
s.append(b"\nendstream\n")
|
|
|
|
s.append(b"endobj\n")
|
|
|
|
|
|
|
|
return b"".join(s)
|
|
|
|
|
|
|
|
class Doc:
|
|
|
|
def __init__(self):
|
|
|
|
self.objs = []
|
|
|
|
self.pages = []
|
|
|
|
|
|
|
|
def add_object(self, o):
|
|
|
|
o.idx = len(self.objs) + 1
|
|
|
|
self.objs.append(o)
|
|
|
|
return o
|
|
|
|
|
|
|
|
def add_page(self, o):
|
|
|
|
self.pages.append(o)
|
|
|
|
return self.add_object(o)
|
|
|
|
|
|
|
|
def __bytes__(self):
|
|
|
|
a = []
|
|
|
|
j = [0]
|
|
|
|
offsets = []
|
|
|
|
|
|
|
|
def add(x):
|
|
|
|
a.append(x)
|
|
|
|
j[0] += len(x) + 1
|
|
|
|
|
|
|
|
add(b'%PDF-1.4')
|
|
|
|
for o in self.objs:
|
|
|
|
offsets.append(j[0])
|
|
|
|
add(f"{o.idx} 0 obj".encode())
|
|
|
|
add(bytes(o))
|
|
|
|
|
|
|
|
xrefstart = j[0]
|
|
|
|
a.append(b'xref')
|
|
|
|
a.append(f'0 {len(offsets) + 1}'.encode())
|
|
|
|
a.append(b'0000000000 65535 f ')
|
|
|
|
|
|
|
|
for o in offsets:
|
|
|
|
a.append(f"{o:010d} 00000 n ".encode())
|
|
|
|
|
|
|
|
a.append(b'')
|
|
|
|
a.append(b'trailer')
|
|
|
|
a.append(f'<< /Size {len(offsets) + 1}\n/Root 1 0 R >>'.encode())
|
|
|
|
a.append(b'startxref')
|
|
|
|
a.append(str(xrefstart).encode())
|
|
|
|
a.append(b'%EOF')
|
|
|
|
|
|
|
|
return b'\n'.join(a)
|
|
|
|
|
|
|
|
def ref(x):
|
|
|
|
return f"{x} 0 R".encode()
|
|
|
|
|
|
|
|
def mkpdf(symtab, pageblobs, colorimgs, texts, dpi):
|
|
|
|
doc = Doc()
|
|
|
|
doc.add_object(Obj({
|
|
|
|
b'Type' : b'/Catalog',
|
|
|
|
b'Outlines' : ref(2),
|
|
|
|
b'Pages' : ref(3)
|
|
|
|
}))
|
|
|
|
doc.add_object(Obj({
|
|
|
|
b'Type' : b'/Outlines',
|
|
|
|
b'Count': b'0'
|
|
|
|
}))
|
|
|
|
pages = Obj({b'Type' : b'/Pages'})
|
|
|
|
doc.add_object(pages)
|
|
|
|
with open(symtab, "rb") as stab:
|
|
|
|
symd = doc.add_object(Obj({}, stab.read()))
|
|
|
|
page_objs = []
|
|
|
|
|
|
|
|
for f in pageblobs:
|
|
|
|
with open(f, "rb") as pf:
|
|
|
|
blob = pf.read()
|
|
|
|
(w, h, xres, yres) = struct.unpack(">IIII", blob[11:27])
|
|
|
|
|
|
|
|
if xres == 0:
|
|
|
|
xres = dpi
|
|
|
|
if yres == 0:
|
|
|
|
yres = dpi
|
|
|
|
|
|
|
|
xobj = Obj({
|
|
|
|
b'Type': b'/XObject',
|
|
|
|
b'Subtype': b'/Image',
|
|
|
|
b'Width': str(w).encode(),
|
|
|
|
b'Height': str(h).encode(),
|
|
|
|
b'ColorSpace': b'/DeviceGray',
|
|
|
|
b'BitsPerComponent': b'1',
|
|
|
|
b'Filter': b'/JBIG2Decode',
|
|
|
|
b'DecodeParms': f' << /JBIG2Globals {symd.idx} 0 R >>'.encode()
|
|
|
|
}, blob)
|
|
|
|
xobj = doc.add_object(xobj)
|
|
|
|
|
|
|
|
fw = float(w * 72) / xres
|
|
|
|
fh = float(h * 72) / yres
|
|
|
|
|
|
|
|
contents = Obj({}, f'q {fw:.1f} 0 0 {fh:.1f} 0 0 cm /Im1 Do Q'.encode())
|
|
|
|
contents = doc.add_object(contents)
|
|
|
|
|
|
|
|
resources = Obj({
|
|
|
|
b'ProcSet': b'[/PDF /ImageB]',
|
|
|
|
b'XObject': f'<< /Im1 {xobj.idx} 0 R >>'.encode()
|
|
|
|
})
|
|
|
|
resources = doc.add_object(resources)
|
|
|
|
|
|
|
|
page = Obj({
|
|
|
|
b'Type': b'/Page',
|
|
|
|
b'Parent': b'3 0 R',
|
|
|
|
b'MediaBox': f'[ 0 0 {fw:.1f} {fh:.1f} ]'.encode(),
|
|
|
|
b'Contents': ref(contents.idx),
|
|
|
|
b'Resources': ref(resources.idx)
|
|
|
|
})
|
|
|
|
doc.add_object(page)
|
|
|
|
|
|
|
|
page_objs.append(page)
|
|
|
|
|
|
|
|
pages.d.d[b'Count'] = str(len(page_objs)).encode()
|
|
|
|
pages.d.d[b'Kids'] = b'[' + b' '.join([ref(x.idx) for x in page_objs]) + b']'
|
|
|
|
|
|
|
|
pdf = fitz.open(stream=bytes(doc))
|
|
|
|
for p, imgs in enumerate(colorimgs):
|
|
|
|
for rect, img, mask in imgs:
|
|
|
|
with open(mask, "rb") as mf:
|
|
|
|
m = mf.read()
|
|
|
|
pdf[p].insert_image(fitz.Rect(*[(c*72.0)/dpi for c in rect]), filename=img, mask=m)
|
|
|
|
|
|
|
|
for p, txt in enumerate(texts):
|
|
|
|
for box in txt:
|
|
|
|
if len(box[1]):
|
|
|
|
bbox = box[0]
|
|
|
|
r = fitz.Rect(
|
|
|
|
min((bbox[0][0]*72.0)/dpi, (bbox[3][0]*72.0)/dpi),
|
|
|
|
min((bbox[0][1]*72.0)/dpi, (bbox[1][1]*72.0)/dpi),
|
|
|
|
max((bbox[1][0]*72.0)/dpi, (bbox[2][0]*72.0)/dpi),
|
|
|
|
max((bbox[2][1]*72.0)/dpi, (bbox[3][1]*72.0)/dpi)
|
|
|
|
)
|
2024-01-13 21:20:09 +01:00
|
|
|
fs = round(r.height)
|
2022-08-28 20:00:13 +02:00
|
|
|
w = fitz.get_text_length(box[1], fontsize=fs)
|
|
|
|
pivot = fitz.Point(r.x0, r.y0)
|
|
|
|
matrix = fitz.Matrix(r.width/w, 1.0)
|
|
|
|
pdf[p].insert_text(fitz.Point(r.x0, r.y0+r.height*0.8),
|
|
|
|
box[1], fontsize=fs,
|
|
|
|
morph=(pivot, matrix),
|
|
|
|
render_mode=3)
|
|
|
|
return pdf.tobytes(garbage=4, clean=True, deflate=True)
|