Files
convertions/convertions-env/lib/python3.11/site-packages/pymupdf/__init__.py
2024-09-29 01:45:31 -04:00

21932 lines
772 KiB
Python

'''
PyMuPDF implemented on top of MuPDF Python bindings.
License:
SPDX-License-Identifier: GPL-3.0-only
'''
# To reduce startup times, we don't import everything we require here.
#
import atexit
import binascii
import collections
import inspect
import io
import math
import os
import pathlib
import re
import string
import sys
import tarfile
import typing
import warnings
import weakref
import zipfile
from . import extra
# Set up g_out_log and g_out_message from environment variables.
#
# PYMUPDF_MESSAGE controls the destination of user messages (the `message()`
# function).
#
# PYMUPDF_LOG controls the destination of internal development logging (the
# `log()` function).
#
# Each should be either `fd:<int>` to set to a file descriptor (e.g. `fd:1`
# for stdout, `fd:2` for stderr), `path:<string>` to write to a file or
# `path+:<string>` to append to a file. If not specified, the default is
# stdout.
#
def _set_stream(name, default):
'''
Returns a stream to use based on environmental variable `name`.
'''
t = os.environ.get(name)
if t is None:
return default
elif t.startswith('fd:'):
return open(int(t[3:]), mode='w', closefd=False)
elif t.startswith('path:'):
return open(t[5:], 'w')
elif t.startswith('path+:'):
return open(t[6:], 'a')
else:
raise Exception(f'Unrecognised stream specification for {name!r} should match `fd:<int>`, `path:<string>` or `path+:<string>`: {t!r}')
_g_out_log = _set_stream('PYMUPDF_LOG', sys.stdout)
_g_out_message = _set_stream('PYMUPDF_MESSAGE', sys.stdout)
def log( text='', caller=1):
'''
For development/debugging diagnostics.
'''
frame_record = inspect.stack( context=0)[ caller]
filename = os.path.relpath(frame_record.filename)
line = frame_record.lineno
function = frame_record.function
print( f'{filename}:{line}:{function}: {text}', file=_g_out_log)
_g_out_log.flush()
def message(text=''):
'''
For user messages.
'''
print(text, file=_g_out_message)
_g_out_message.flush()
def exception_info():
import traceback
log(f'exception_info:')
traceback.print_exc(file=_g_out_log)
# PDF names must not contain these characters:
INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0))
def get_env_bool( name, default):
'''
Returns `True`, `False` or `default` depending on whether $<name> is '1',
'0' or unset. Otherwise assert-fails.
'''
v = os.environ.get( name)
if v is None:
ret = default
elif v == '1':
ret = True
elif v == '0':
ret = False
else:
assert 0, f'Unrecognised value for {name}: {v!r}'
if ret != default:
log(f'Using non-default setting from {name}: {v!r}')
return ret
def get_env_int( name, default):
'''
Returns `True`, `False` or `default` depending on whether $<name> is '1',
'0' or unset. Otherwise assert-fails.
'''
v = os.environ.get( name)
if v is None:
ret = default
else:
ret = int(v)
if ret != default:
log(f'Using non-default setting from {name}: {v}')
return ret
# All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is
# true.
g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1)
# $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`.
#
g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True)
# Global switches
#
class _Globals:
def __init__(self):
self.no_device_caching = 0
self.small_glyph_heights = 0
self.subset_fontnames = 0
self.skip_quad_corrections = 0
_globals = _Globals()
# Optionally use MuPDF via cppyy bindings; experimental and not tested recently
# as of 2023-01-20 11:51:40
#
mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY')
if mupdf_cppyy is not None:
# pylint: disable=all
log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.')
log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}')
if mupdf_cppyy == '':
import mupdf_cppyy
else:
import importlib
mupdf_cppyy = importlib.machinery.SourceFileLoader(
'mupdf_cppyy',
mupdf_cppyy
).load_module()
mupdf = mupdf_cppyy.cppyy.gbl.mupdf
else:
# Use MuPDF Python SWIG bindings. We allow import from either our own
# directory for conventional wheel installs, or from separate place in case
# we are using a separately-installed system installation of mupdf.
#
try:
from . import mupdf
except Exception:
import mupdf
mupdf.reinit_singlethreaded()
def _int_rc(text):
'''
Converts string to int, ignoring trailing 'rc...'.
'''
rc = text.find('rc')
if rc >= 0:
text = text[:rc]
return int(text)
VersionFitz = mupdf.FZ_VERSION # MuPDF version.
VersionBind = "1.24.4" # PyMuPDF version.
VersionDate = "2024-05-16 00:00:01"
VersionDate2 = VersionDate.replace('-', '').replace(' ', '').replace(':', '')
version = (VersionBind, VersionFitz, VersionDate2)
pymupdf_version_tuple = tuple( [_int_rc(i) for i in VersionBind.split('.')])
mupdf_version_tuple = (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)
assert mupdf_version_tuple == tuple([_int_rc(i) for i in VersionFitz.split('.')]), \
f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {VersionFitz=}'
# String formatting.
def _format_g(value, *, fmt='%g'):
'''
Returns `value` formatted with mupdf.fz_format_double() if available,
otherwise with Python's `%`.
If `value` is a list or tuple, we return a space-separated string of
formatted values.
'''
if isinstance(value, (list, tuple)):
ret = ''
for v in value:
if ret:
ret += ' '
ret += _format_g(v, fmt=fmt)
return ret
else:
if mupdf_version_tuple >= (1, 24, 2):
return mupdf.fz_format_double(fmt, value)
else:
return fmt % value
format_g = _format_g
# Names required by class method typing annotations.
OptBytes = typing.Optional[typing.ByteString]
OptDict = typing.Optional[dict]
OptFloat = typing.Optional[float]
OptInt = typing.Union[int, None]
OptSeq = typing.Optional[typing.Sequence]
OptStr = typing.Optional[str]
Page = 'Page_forward_decl'
Point = 'Point_forward_decl'
TESSDATA_PREFIX = os.environ.get("TESSDATA_PREFIX")
matrix_like = 'matrix_like'
point_like = 'point_like'
quad_like = 'quad_like'
rect_like = 'rect_like'
def _as_fz_document(document):
'''
Returns document as a mupdf.FzDocument, upcasting as required. Raises
'document closed' exception if closed.
'''
if isinstance(document, Document):
if document.is_closed:
raise ValueError('document closed')
document = document.this
if isinstance(document, mupdf.FzDocument):
return document
elif isinstance(document, mupdf.PdfDocument):
return document.super()
elif document is None:
assert 0, f'document is None'
else:
assert 0, f'Unrecognised {type(document)=}'
def _as_pdf_document(document):
'''
Returns document as a mupdf.PdfDocument, downcasting as required. If we
fail (i.e. document is a mupdf.FzDocument(), <ret>.m_internal will be
None. Raises 'document closed' exception if closed.
'''
if isinstance(document, Document):
if document.is_closed:
raise ValueError('document closed')
document = document.this
if isinstance(document, mupdf.PdfDocument):
return document
elif isinstance(document, mupdf.FzDocument):
return mupdf.PdfDocument(document)
elif document is None:
assert 0, f'document is None'
else:
assert 0, f'Unrecognised {type(document)=}'
def _as_fz_page(page):
'''
Returns page as a mupdf.FzPage, upcasting as required.
'''
if isinstance(page, Page):
page = page.this
if isinstance(page, mupdf.PdfPage):
return page.super()
elif isinstance(page, mupdf.FzPage):
return page
elif page is None:
assert 0, f'page is None'
else:
assert 0, f'Unrecognised {type(page)=}'
def _as_pdf_page(page):
'''
Returns page as a mupdf.PdfPage, downcasting as required. If we fail (i.e.
page is a mupdf.FzPage(), <ret>.m_internal will be None.
'''
if isinstance(page, Page):
page = page.this
if isinstance(page, mupdf.PdfPage):
return page
elif isinstance(page, mupdf.FzPage):
return mupdf.pdf_page_from_fz_page(page)
elif page is None:
assert 0, f'page is None'
else:
assert 0, f'Unrecognised {type(page)=}'
# Fixme: we don't support JM_MEMORY=1.
JM_MEMORY = 0
# Classes
#
class Annot:
def __init__(self, annot):
assert isinstance( annot, mupdf.PdfAnnot)
self.this = annot
def __repr__(self):
parent = getattr(self, 'parent', '<>')
return "'%s' annotation on %s" % (self.type[1], str(parent))
def __str__(self):
return self.__repr__()
def _erase(self):
if getattr(self, "thisown", False):
self.thisown = False
def _get_redact_values(self):
annot = self.this
if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT:
return
values = dict()
try:
obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO")
if obj.m_internal:
message_warning("Ignoring redaction key '/RO'.")
xref = mupdf.pdf_to_num(obj)
values[dictkey_xref] = xref
obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText")
if obj.m_internal:
text = mupdf.pdf_to_text_string(obj)
values[dictkey_text] = JM_UnicodeFromStr(text)
else:
values[dictkey_text] = ''
obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'))
align = 0
if obj.m_internal:
align = mupdf.pdf_to_int(obj)
values[dictkey_align] = align
except Exception:
if g_exceptions_verbose: exception_info()
return
val = values
if not val:
return val
val["rect"] = self.rect
text_color, fontname, fontsize = TOOLS._parse_da(self)
val["text_color"] = text_color
val["fontname"] = fontname
val["fontsize"] = fontsize
fill = self.colors["fill"]
val["fill"] = fill
return val
def _getAP(self):
if g_use_extra:
assert isinstance( self.this, mupdf.PdfAnnot)
ret = extra.Annot_getAP(self.this)
assert isinstance( ret, bytes)
return ret
else:
r = None
res = None
annot = self.this
assert isinstance( annot, mupdf.PdfAnnot)
annot_obj = mupdf.pdf_annot_obj( annot)
ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
if mupdf.pdf_is_stream( ap):
res = mupdf.pdf_load_stream( ap)
if res and res.m_internal:
r = JM_BinFromBuffer(res)
return r
def _setAP(self, buffer_, rect=0):
try:
annot = self.this
annot_obj = mupdf.pdf_annot_obj( annot)
page = mupdf.pdf_annot_page( annot)
apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
if not apobj.m_internal:
raise RuntimeError( MSG_BAD_APN)
if not mupdf.pdf_is_stream( apobj):
raise RuntimeError( MSG_BAD_APN)
res = JM_BufferFromBytes( buffer_)
if not res.m_internal:
raise ValueError( MSG_BAD_BUFFER)
JM_update_stream( page.doc(), apobj, res, 1)
if rect:
bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect'))
mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox)
except Exception:
if g_exceptions_verbose: exception_info()
def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1):
annot = self.this
assert annot.m_internal
annot_obj = mupdf.pdf_annot_obj( annot)
page = mupdf.pdf_annot_page( annot)
pdf = page.doc()
type_ = mupdf.pdf_annot_type( annot)
nfcol, fcol = JM_color_FromSequence(fill_color)
try:
# remove fill color from unsupported annots
# or if so requested
if nfcol == 0 or type_ not in (
mupdf.PDF_ANNOT_SQUARE,
mupdf.PDF_ANNOT_CIRCLE,
mupdf.PDF_ANNOT_LINE,
mupdf.PDF_ANNOT_POLY_LINE,
mupdf.PDF_ANNOT_POLYGON
):
mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC'))
elif nfcol > 0:
mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol])
insert_rot = 1 if rotate >= 0 else 0
if type_ not in (
mupdf.PDF_ANNOT_CARET,
mupdf.PDF_ANNOT_CIRCLE,
mupdf.PDF_ANNOT_FREE_TEXT,
mupdf.PDF_ANNOT_FILE_ATTACHMENT,
mupdf.PDF_ANNOT_INK,
mupdf.PDF_ANNOT_LINE,
mupdf.PDF_ANNOT_POLY_LINE,
mupdf.PDF_ANNOT_POLYGON,
mupdf.PDF_ANNOT_SQUARE,
mupdf.PDF_ANNOT_STAMP,
mupdf.PDF_ANNOT_TEXT,
):
insert_rot = 0
if insert_rot:
mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Rotate'), rotate)
mupdf.pdf_dirty_annot( annot)
mupdf.pdf_update_annot( annot) # let MuPDF update
pdf.resynth_required = 0
# insert fill color
if type_ == mupdf.PDF_ANNOT_FREE_TEXT:
if nfcol > 0:
mupdf.pdf_set_annot_color( annot, fcol[:nfcol])
elif nfcol > 0:
col = mupdf.pdf_new_array( page.doc(), nfcol)
for i in range( nfcol):
mupdf.pdf_array_push_real( col, fcol[i])
mupdf.pdf_dict_put( annot_obj, PDF_NAME('IC'), col)
except Exception as e:
if g_exceptions_verbose: exception_info()
message( f'cannot update annot: {e}')
raise
if (opacity < 0 or opacity >= 1) and not blend_mode: # no opacity, no blend_mode
return True
try: # create or update /ExtGState
ap = mupdf.pdf_dict_getl(
mupdf.pdf_annot_obj(annot),
PDF_NAME('AP'),
PDF_NAME('N')
)
if not ap.m_internal: # should never happen
raise RuntimeError( MSG_BAD_APN)
resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources'))
if not resources.m_internal: # no Resources yet: make one
resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2)
alp0 = mupdf.pdf_new_dict( page.doc(), 3)
if opacity >= 0 and opacity < 1:
mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity)
mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity)
mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity)
if blend_mode:
mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode)
mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode)
extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState'))
if not extg.m_internal: # no ExtGState yet: make one
extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2)
mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0)
except Exception as e:
if g_exceptions_verbose: exception_info()
message( f'cannot set opacity or blend mode\n: {e}')
raise
return True
@property
def apn_bbox(self):
"""annotation appearance bbox"""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
if not ap.m_internal:
val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE))
else:
rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox'))
val = JM_py_from_rect(rect)
val = Rect(val) * self.get_parent().transformation_matrix
val *= self.get_parent().derotation_matrix
return val
@property
def apn_matrix(self):
"""annotation appearance matrix"""
try:
CheckParent(self)
annot = self.this
assert isinstance(annot, mupdf.PdfAnnot)
ap = mupdf.pdf_dict_getl(
mupdf.pdf_annot_obj(annot),
mupdf.PDF_ENUM_NAME_AP,
mupdf.PDF_ENUM_NAME_N
)
if not ap.m_internal:
return JM_py_from_matrix(mupdf.FzMatrix())
mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix)
val = JM_py_from_matrix(mat)
val = Matrix(val)
return val
except Exception:
if g_exceptions_verbose: exception_info()
raise
@property
def blendmode(self):
"""annotation BlendMode"""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM'))
blend_mode = None
if obj.m_internal:
blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj))
return blend_mode
# loop through the /AP/N/Resources/ExtGState objects
obj = mupdf.pdf_dict_getl(
annot_obj,
PDF_NAME('AP'),
PDF_NAME('N'),
PDF_NAME('Resources'),
PDF_NAME('ExtGState'),
)
if mupdf.pdf_is_dict(obj):
n = mupdf.pdf_dict_len(obj)
for i in range(n):
obj1 = mupdf.pdf_dict_get_val(obj, i)
if mupdf.pdf_is_dict(obj1):
m = mupdf.pdf_dict_len(obj1)
for j in range(m):
obj2 = mupdf.pdf_dict_get_key(obj1, j)
if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0:
blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j)))
return blend_mode
return blend_mode
@property
def border(self):
"""Border information."""
CheckParent(self)
atype = self.type[0]
if atype not in (
mupdf.PDF_ANNOT_CIRCLE,
mupdf.PDF_ANNOT_FREE_TEXT,
mupdf.PDF_ANNOT_INK,
mupdf.PDF_ANNOT_LINE,
mupdf.PDF_ANNOT_POLY_LINE,
mupdf.PDF_ANNOT_POLYGON,
mupdf.PDF_ANNOT_SQUARE,
):
return dict()
ao = mupdf.pdf_annot_obj(self.this)
ret = JM_annot_border(ao)
return ret
def clean_contents(self, sanitize=1):
"""Clean appearance contents stream."""
CheckParent(self)
annot = self.this
pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot))
filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize)
mupdf.pdf_filter_annot_contents(pdf, annot, filter_)
@property
def colors(self):
"""Color definitions."""
try:
CheckParent(self)
annot = self.this
assert isinstance(annot, mupdf.PdfAnnot)
return JM_annot_colors(mupdf.pdf_annot_obj(annot))
except Exception:
if g_exceptions_verbose: exception_info()
raise
def delete_responses(self):
"""Delete 'Popup' and responding annotations."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
page = mupdf.pdf_annot_page(annot)
while 1:
irt_annot = JM_find_annot_irt(annot)
if not irt_annot.m_internal:
break
mupdf.pdf_delete_annot(page, irt_annot)
mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup'))
annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots'))
n = mupdf.pdf_array_len(annots)
found = 0
for i in range(n-1, -1, -1):
o = mupdf.pdf_array_get(annots, i)
p = mupdf.pdf_dict_get(o, PDF_NAME('Parent'))
if not o.m_internal:
continue
if not mupdf.pdf_objcmp(p, annot_obj):
mupdf.pdf_array_delete(annots, i)
found = 1
if found:
mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots)
@property
def file_info(self):
"""Attached file information."""
CheckParent(self)
res = dict()
length = -1
size = -1
desc = None
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
type_ = mupdf.pdf_annot_type(annot)
if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
raise TypeError( MSG_BAD_ANNOT_TYPE)
stream = mupdf.pdf_dict_getl(
annot_obj,
PDF_NAME('FS'),
PDF_NAME('EF'),
PDF_NAME('F'),
)
if not stream.m_internal:
RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
o = mupdf.pdf_dict_get(fs, PDF_NAME('UF'))
if o.m_internal:
filename = mupdf.pdf_to_text_string(o)
else:
o = mupdf.pdf_dict_get(fs, PDF_NAME('F'))
if o.m_internal:
filename = mupdf.pdf_to_text_string(o)
o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc'))
if o.m_internal:
desc = mupdf.pdf_to_text_string(o)
o = mupdf.pdf_dict_get(stream, PDF_NAME('Length'))
if o.m_internal:
length = mupdf.pdf_to_int(o)
o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size'))
if o.m_internal:
size = mupdf.pdf_to_int(o)
res[ dictkey_filename] = JM_EscapeStrFromStr(filename)
res[ dictkey_desc] = JM_UnicodeFromStr(desc)
res[ dictkey_length] = length
res[ dictkey_size] = size
return res
@property
def flags(self):
"""Flags field."""
CheckParent(self)
annot = self.this
return mupdf.pdf_annot_flags(annot)
def get_file(self):
"""Retrieve attached file content."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
type = mupdf.pdf_annot_type(annot)
if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
raise TypeError( MSG_BAD_ANNOT_TYPE)
stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
if not stream.m_internal:
RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
buf = mupdf.pdf_load_stream(stream)
res = JM_BinFromBuffer(buf)
return res
def get_oc(self):
"""Get annotation optional content reference."""
CheckParent(self)
oc = 0
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC'))
if obj.m_internal:
oc = mupdf.pdf_to_num(obj)
return oc
# PyMuPDF doesn't seem to have this .parent member, but removing it breaks
# 11 tests...?
#@property
def get_parent(self):
try:
ret = getattr( self, 'parent')
except AttributeError:
page = mupdf.pdf_annot_page(self.this)
assert isinstance( page, mupdf.PdfPage)
document = Document( page.doc()) if page.m_internal else None
ret = Page(page, document)
#self.parent = weakref.proxy( ret)
self.parent = ret
#log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.')
#log( f'Have set self.parent')
return ret
def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0):
"""annotation Pixmap"""
CheckParent(self)
cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK}
if type(colorspace) is str:
colorspace = cspaces.get(colorspace.lower(), None)
if dpi:
matrix = Matrix(dpi / 72, dpi / 72)
ctm = JM_matrix_from_py(matrix)
cs = colorspace
if not cs:
cs = mupdf.fz_device_rgb()
pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha)
ret = Pixmap(pix)
if dpi:
ret.set_dpi(dpi, dpi)
return ret
def get_sound(self):
"""Retrieve sound stream."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
type = mupdf.pdf_annot_type(annot)
sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound'))
if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal:
raise TypeError( MSG_BAD_ANNOT_TYPE)
if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal:
RAISEPY( "unsupported sound stream", JM_Exc_FileDataError)
res = dict()
obj = mupdf.pdf_dict_get(sound, PDF_NAME('R'))
if obj.m_internal:
res['rate'] = mupdf.pdf_to_real(obj)
obj = mupdf.pdf_dict_get(sound, PDF_NAME('C'))
if obj.m_internal:
res['channels'] = mupdf.pdf_to_int(obj)
obj = mupdf.pdf_dict_get(sound, PDF_NAME('B'))
if obj.m_internal:
res['bps'] = mupdf.pdf_to_int(obj)
obj = mupdf.pdf_dict_get(sound, PDF_NAME('E'))
if obj.m_internal:
res['encoding'] = mupdf.pdf_to_name(obj)
obj = mupdf.pdf_dict_gets(sound, "CO")
if obj.m_internal:
res['compression'] = mupdf.pdf_to_name(obj)
buf = mupdf.pdf_load_stream(sound)
stream = JM_BinFromBuffer(buf)
res['stream'] = stream
return res
def get_textpage(self, clip=None, flags=0):
"""Make annotation TextPage."""
CheckParent(self)
options = mupdf.FzStextOptions()
options.flags = flags
annot = self.this
stextpage = mupdf.FzStextPage(annot, options)
ret = TextPage(stextpage)
p = self.get_parent()
if isinstance(p, weakref.ProxyType):
ret.parent = p
else:
ret.parent = weakref.proxy(p)
return ret
@property
def has_popup(self):
"""Check if annotation has a Popup."""
CheckParent(self)
annot = self.this
obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup'))
return True if obj.m_internal else False
@property
def info(self):
"""Various information details."""
CheckParent(self)
annot = self.this
res = dict()
res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot))
o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'))
res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o))
# Title (= author)
o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T'))
res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
# CreationDate
o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate")
res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
# ModDate
o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M'))
res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
# Subj
o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj")
res[dictkey_subject] = mupdf.pdf_to_text_string(o)
# Identification (PDF key /NM)
o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")
res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
return res
@property
def irt_xref(self):
'''
annotation IRT xref
'''
annot = self.this
annot_obj = mupdf.pdf_annot_obj( annot)
irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT'))
if not irt.m_internal:
return 0
return mupdf.pdf_to_num( irt)
@property
def is_open(self):
"""Get 'open' status of annotation or its Popup."""
CheckParent(self)
return mupdf.pdf_annot_is_open(self.this)
@property
def language(self):
"""annotation language"""
this_annot = self.this
lang = mupdf.pdf_annot_language(this_annot)
if lang == mupdf.FZ_LANG_UNSET:
return
assert hasattr(mupdf, 'fz_string_from_text_language2')
return mupdf.fz_string_from_text_language2(lang)
@property
def line_ends(self):
"""Line end codes."""
CheckParent(self)
annot = self.this
# return nothing for invalid annot types
if not mupdf.pdf_annot_has_line_ending_styles(annot):
return
lstart = mupdf.pdf_annot_line_start_style(annot)
lend = mupdf.pdf_annot_line_end_style(annot)
return lstart, lend
@property
def next(self):
"""Next annotation."""
CheckParent(self)
this_annot = self.this
assert isinstance(this_annot, mupdf.PdfAnnot)
assert this_annot.m_internal
type_ = mupdf.pdf_annot_type(this_annot)
if type_ != mupdf.PDF_ANNOT_WIDGET:
annot = mupdf.pdf_next_annot(this_annot)
else:
annot = mupdf.pdf_next_widget(this_annot)
val = Annot(annot) if annot.m_internal else None
if not val:
return None
val.thisown = True
assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value()
val.parent._annot_refs[id(val)] = val
if val.type[0] == mupdf.PDF_ANNOT_WIDGET:
widget = Widget()
TOOLS._fill_widget(val, widget)
val = widget
return val
@property
def opacity(self):
"""Opacity."""
CheckParent(self)
annot = self.this
opy = -1
ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA)
if mupdf.pdf_is_number(ca):
opy = mupdf.pdf_to_real(ca)
return opy
@property
def popup_rect(self):
"""annotation 'Popup' rectangle"""
CheckParent(self)
rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
annot = self.this
annot_obj = mupdf.pdf_annot_obj( annot)
obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup'))
if obj.m_internal:
rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect'))
#log( '{rect=}')
val = JM_py_from_rect(rect)
#log( '{val=}')
val = Rect(val) * self.get_parent().transformation_matrix
val *= self.get_parent().derotation_matrix
return val
@property
def popup_xref(self):
"""annotation 'Popup' xref"""
CheckParent(self)
xref = 0
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup'))
if obj.m_internal:
xref = mupdf.pdf_to_num(obj)
return xref
@property
def rect(self):
"""annotation rectangle"""
if g_use_extra:
val = extra.Annot_rect3( self.this)
else:
val = mupdf.pdf_bound_annot(self.this)
val = Rect(val)
# Caching self.parent_() reduces 1000x from 0.07 to 0.04.
#
p = self.get_parent()
#p = getattr( self, 'parent', None)
#if p is None:
# p = self.parent
# self.parent = p
#p = self.parent_()
val *= p.derotation_matrix
return val
@property
def rect_delta(self):
'''
annotation delta values to rectangle
'''
annot_obj = mupdf.pdf_annot_obj(self.this)
arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD'))
if mupdf.pdf_array_len( arr) == 4:
return (
mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)),
mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)),
-mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)),
-mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)),
)
@property
def rotation(self):
"""annotation rotation"""
CheckParent(self)
annot = self.this
rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate)
if not rotation.m_internal:
return -1
return mupdf.pdf_to_int( rotation)
def set_apn_bbox(self, bbox):
"""
Set annotation appearance bbox.
"""
CheckParent(self)
page = self.get_parent()
rot = page.rotation_matrix
mat = page.transformation_matrix
bbox *= rot * ~mat
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
if not ap.m_internal:
raise RuntimeError( MSG_BAD_APN)
rect = JM_rect_from_py(bbox)
mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect)
def set_apn_matrix(self, matrix):
"""Set annotation appearance matrix."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
if not ap.m_internal:
raise RuntimeError( MSG_BAD_APN)
mat = JM_matrix_from_py(matrix)
mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat)
def set_blendmode(self, blend_mode):
"""Set annotation BlendMode."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode)
def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1):
"""Set border properties.
Either a dict, or direct arguments width, style, dashes or clouds."""
CheckParent(self)
atype, atname = self.type[:2] # annotation type
if atype not in (
mupdf.PDF_ANNOT_CIRCLE,
mupdf.PDF_ANNOT_FREE_TEXT,
mupdf.PDF_ANNOT_INK,
mupdf.PDF_ANNOT_LINE,
mupdf.PDF_ANNOT_POLY_LINE,
mupdf.PDF_ANNOT_POLYGON,
mupdf.PDF_ANNOT_SQUARE,
):
message(f"Cannot set border for '{atname}'.")
return None
if atype not in (
mupdf.PDF_ANNOT_CIRCLE,
mupdf.PDF_ANNOT_FREE_TEXT,
mupdf.PDF_ANNOT_POLYGON,
mupdf.PDF_ANNOT_SQUARE,
):
if clouds > 0:
message(f"Cannot set cloudy border for '{atname}'.")
clouds = -1 # do not set border effect
if type(border) is not dict:
border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds}
border.setdefault("width", -1)
border.setdefault("style", None)
border.setdefault("dashes", None)
border.setdefault("clouds", -1)
if border["width"] is None:
border["width"] = -1
if border["clouds"] is None:
border["clouds"] = -1
if hasattr(border["dashes"], "__getitem__"): # ensure sequence items are integers
border["dashes"] = tuple(border["dashes"])
for item in border["dashes"]:
if not isinstance(item, int):
border["dashes"] = None
break
annot = self.this
annot_obj = mupdf.pdf_annot_obj( annot)
pdf = mupdf.pdf_get_bound_document( annot_obj)
return JM_annot_set_border( border, pdf, annot_obj)
def set_colors(self, colors=None, stroke=None, fill=None):
"""Set 'stroke' and 'fill' colors.
Use either a dict or the direct arguments.
"""
CheckParent(self)
doc = self.get_parent().parent
if type(colors) is not dict:
colors = {"fill": fill, "stroke": stroke}
fill = colors.get("fill")
stroke = colors.get("stroke")
fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON,
mupdf.PDF_ANNOT_REDACT,)
if stroke in ([], ()):
doc.xref_set_key(self.xref, "C", "[]")
elif stroke is not None:
if hasattr(stroke, "__float__"):
stroke = [float(stroke)]
CheckColor(stroke)
assert len(stroke) in (1, 3, 4)
s = f"[{_format_g(stroke)}]"
doc.xref_set_key(self.xref, "C", s)
if fill and self.type[0] not in fill_annots:
message("Warning: fill color ignored for annot type '%s'." % self.type[1])
return
if fill in ([], ()):
doc.xref_set_key(self.xref, "IC", "[]")
elif fill is not None:
if hasattr(fill, "__float__"):
fill = [float(fill)]
CheckColor(fill)
assert len(fill) in (1, 3, 4)
s = f"[{_format_g(fill)}]"
doc.xref_set_key(self.xref, "IC", s)
def set_flags(self, flags):
"""Set annotation flags."""
CheckParent(self)
annot = self.this
mupdf.pdf_set_annot_flags(annot, flags)
def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None):
"""Set various properties."""
CheckParent(self)
if type(info) is dict: # build the args from the dictionary
content = info.get("content", None)
title = info.get("title", None)
creationDate = info.get("creationDate", None)
modDate = info.get("modDate", None)
subject = info.get("subject", None)
info = None
annot = self.this
# use this to indicate a 'markup' annot type
is_markup = mupdf.pdf_annot_has_author(annot)
# contents
if content:
mupdf.pdf_set_annot_contents(annot, content)
if is_markup:
# title (= author)
if title:
mupdf.pdf_set_annot_author(annot, title)
# creation date
if creationDate:
mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate)
# mod date
if modDate:
mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate)
# subject
if subject:
mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject))
def set_irt_xref(self, xref):
'''
Set annotation IRT xref
'''
annot = self.this
annot_obj = mupdf.pdf_annot_obj( annot)
page = mupdf.pdf_annot_page( annot)
if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
raise ValueError( MSG_BAD_XREF)
irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype'))
irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt))
if irt_subt < 0:
raise ValueError( MSG_IS_NO_ANNOT)
mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt)
def set_language(self, language=None):
"""Set annotation language."""
CheckParent(self)
this_annot = self.this
if not language:
lang = mupdf.FZ_LANG_UNSET
else:
lang = mupdf.fz_text_language_from_string(language)
mupdf.pdf_set_annot_language(this_annot, lang)
def set_line_ends(self, start, end):
"""Set line end codes."""
CheckParent(self)
annot = self.this
if mupdf.pdf_annot_has_line_ending_styles(annot):
mupdf.pdf_set_annot_line_ending_styles(annot, start, end)
else:
message_warning("bad annot type for line ends")
def set_name(self, name):
"""Set /Name (icon) of annotation."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name)
def set_oc(self, oc=0):
"""Set / remove annotation OC xref."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
if not oc:
mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC'))
else:
JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc)
def set_opacity(self, opacity):
"""Set opacity."""
CheckParent(self)
annot = self.this
if not _INRANGE(opacity, 0.0, 1.0):
mupdf.pdf_set_annot_opacity(annot, 1)
return
mupdf.pdf_set_annot_opacity(annot, opacity)
if opacity < 1.0:
page = mupdf.pdf_annot_page(annot)
page.transparency = 1
def set_open(self, is_open):
"""Set 'open' status of annotation or its Popup."""
CheckParent(self)
annot = self.this
mupdf.pdf_set_annot_is_open(annot, is_open)
def set_popup(self, rect):
'''
Create annotation 'Popup' or update rectangle.
'''
CheckParent(self)
annot = self.this
pdfpage = mupdf.pdf_annot_page( annot)
rot = JM_rotate_page_matrix(pdfpage)
r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
mupdf.pdf_set_annot_popup(annot, r)
def set_rect(self, rect):
"""Set annotation rectangle."""
CheckParent(self)
annot = self.this
pdfpage = mupdf.pdf_annot_page(annot)
rot = JM_rotate_page_matrix(pdfpage)
r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
raise ValueError( MSG_BAD_RECT)
try:
mupdf.pdf_set_annot_rect(annot, r)
except Exception as e:
message(f'cannot set rect: {e}')
return False
def set_rotation(self, rotate=0):
"""Set annotation rotation."""
CheckParent(self)
annot = self.this
type = mupdf.pdf_annot_type(annot)
if type not in (
mupdf.PDF_ANNOT_CARET,
mupdf.PDF_ANNOT_CIRCLE,
mupdf.PDF_ANNOT_FREE_TEXT,
mupdf.PDF_ANNOT_FILE_ATTACHMENT,
mupdf.PDF_ANNOT_INK,
mupdf.PDF_ANNOT_LINE,
mupdf.PDF_ANNOT_POLY_LINE,
mupdf.PDF_ANNOT_POLYGON,
mupdf.PDF_ANNOT_SQUARE,
mupdf.PDF_ANNOT_STAMP,
mupdf.PDF_ANNOT_TEXT,
):
return
rot = rotate
while rot < 0:
rot += 360
while rot >= 360:
rot -= 360
if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0:
rot = 0
annot_obj = mupdf.pdf_annot_obj(annot)
mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot)
@property
def type(self):
"""annotation type"""
CheckParent(self)
if not self.this.m_internal:
return 'null'
type_ = mupdf.pdf_annot_type(self.this)
c = mupdf.pdf_string_from_annot_type(type_)
o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT')
if not o.m_internal or mupdf.pdf_is_name(o):
return (type_, c)
it = mupdf.pdf_to_name(o)
return (type_, c, it)
def update(self,
blend_mode: OptStr =None,
opacity: OptFloat =None,
fontsize: float =0,
fontname: OptStr =None,
text_color: OptSeq =None,
border_color: OptSeq =None,
fill_color: OptSeq =None,
cross_out: bool =True,
rotate: int =-1,
):
"""Update annot appearance.
Notes:
Depending on the annot type, some parameters make no sense,
while others are only available in this method to achieve the
desired result. This is especially true for 'FreeText' annots.
Args:
blend_mode: set the blend mode, all annotations.
opacity: set the opacity, all annotations.
fontsize: set fontsize, 'FreeText' only.
fontname: set the font, 'FreeText' only.
border_color: set border color, 'FreeText' only.
text_color: set text color, 'FreeText' only.
fill_color: set fill color, all annotations.
cross_out: draw diagonal lines, 'Redact' only.
rotate: set rotation, 'FreeText' and some others.
"""
Annot.update_timing_test()
CheckParent(self)
def color_string(cs, code):
"""Return valid PDF color operator for a given color sequence.
"""
cc = ColorCode(cs, code)
if not cc:
return b""
return (cc + "\n").encode()
annot_type = self.type[0] # get the annot type
dt = self.border.get("dashes", None) # get the dashes spec
bwidth = self.border.get("width", -1) # get border line width
stroke = self.colors["stroke"] # get the stroke color
if fill_color is not None:
fill = fill_color
else:
fill = self.colors["fill"]
rect = None # self.rect # prevent MuPDF fiddling with it
apnmat = self.apn_matrix # prevent MuPDF fiddling with it
if rotate != -1: # sanitize rotation value
while rotate < 0:
rotate += 360
while rotate >= 360:
rotate -= 360
if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0:
rotate = 0
#------------------------------------------------------------------
# handle opacity and blend mode
#------------------------------------------------------------------
if blend_mode is None:
blend_mode = self.blendmode
if not hasattr(opacity, "__float__"):
opacity = self.opacity
if 0 <= opacity < 1 or blend_mode is not None:
opa_code = "/H gs\n" # then we must reference this 'gs'
else:
opa_code = ""
if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
CheckColor(border_color)
CheckColor(text_color)
CheckColor(fill_color)
tcol, fname, fsize = TOOLS._parse_da(self)
# read and update default appearance as necessary
update_default_appearance = False
if fsize <= 0:
fsize = 12
update_default_appearance = True
if text_color is not None:
tcol = text_color
update_default_appearance = True
if fontname is not None:
fname = fontname
update_default_appearance = True
if fontsize > 0:
fsize = fontsize
update_default_appearance = True
if update_default_appearance:
da_str = ""
if len(tcol) == 3:
fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
elif len(tcol) == 1:
fmt = "{:g} g /{f:s} {s:g} Tf"
elif len(tcol) == 4:
fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf"
da_str = fmt.format(*tcol, f=fname, s=fsize)
TOOLS._update_da(self, da_str)
#------------------------------------------------------------------
# now invoke MuPDF to update the annot appearance
#------------------------------------------------------------------
val = self._update_appearance(
opacity=opacity,
blend_mode=blend_mode,
fill_color=fill,
rotate=rotate,
)
if val is False:
raise RuntimeError("Error updating annotation.")
bfill = color_string(fill, "f")
bstroke = color_string(stroke, "c")
p_ctm = self.get_parent().transformation_matrix
imat = ~p_ctm # inverse page transf. matrix
if dt:
dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n"
dashes = dashes.encode("utf-8")
else:
dashes = None
if self.line_ends:
line_end_le, line_end_ri = self.line_ends
else:
line_end_le, line_end_ri = 0, 0 # init line end codes
# read contents as created by MuPDF
ap = self._getAP()
ap_tab = ap.splitlines() # split in single lines
ap_updated = False # assume we did nothing
if annot_type == mupdf.PDF_ANNOT_REDACT:
if cross_out: # create crossed-out rect
ap_updated = True
ap_tab = ap_tab[:-1]
_, LL, LR, UR, UL = ap_tab
ap_tab.append(LR)
ap_tab.append(LL)
ap_tab.append(UR)
ap_tab.append(LL)
ap_tab.append(UL)
ap_tab.append(b"S")
if bwidth > 0 or bstroke != b"":
ap_updated = True
ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else []
for line in ap_tab:
if line.endswith(b"w"):
continue
if line.endswith(b"RG") and bstroke != b"":
line = bstroke[:-1]
ntab.append(line)
ap_tab = ntab
ap = b"\n".join(ap_tab)
if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
BT = ap.find(b"BT")
ET = ap.rfind(b"ET") + 2
ap = ap[BT:ET]
w, h = self.rect.width, self.rect.height
if rotate in (90, 270) or not (apnmat.b == apnmat.c == 0):
w, h = h, w
re = b"0 0 " + _format_g((w, h)).encode() + b" re"
ap = re + b"\nW\nn\n" + ap
ope = None
fill_string = color_string(fill, "f")
if fill_string:
ope = b"f"
stroke_string = color_string(border_color, "c")
if stroke_string and bwidth > 0:
ope = b"S"
bwidth = _format_g(bwidth).encode() + b" w\n"
else:
bwidth = stroke_string = b""
if fill_string and stroke_string:
ope = b"B"
if ope is not None:
ap = bwidth + fill_string + stroke_string + re + b"\n" + ope + b"\n" + ap
if dashes is not None: # handle dashes
ap = dashes + b"\n" + ap
dashes = None
ap_updated = True
if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
ap = b"\n".join(ap_tab[:-1]) + b"\n"
ap_updated = True
if bfill != b"":
if annot_type == mupdf.PDF_ANNOT_POLYGON:
ap = ap + bfill + b"b" # close, fill, and stroke
elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
ap = ap + b"S" # stroke
else:
if annot_type == mupdf.PDF_ANNOT_POLYGON:
ap = ap + b"s" # close and stroke
elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
ap = ap + b"S" # stroke
if dashes is not None: # handle dashes
ap = dashes + ap
# reset dashing - only applies for LINE annots with line ends given
ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1)
ap_updated = True
if opa_code:
ap = opa_code.encode("utf-8") + ap
ap_updated = True
ap = b"q\n" + ap + b"\nQ\n"
#----------------------------------------------------------------------
# the following handles line end symbols for 'Polygon' and 'Polyline'
#----------------------------------------------------------------------
if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
le_funcs = (None, TOOLS._le_square, TOOLS._le_circle,
TOOLS._le_diamond, TOOLS._le_openarrow,
TOOLS._le_closedarrow, TOOLS._le_butt,
TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow,
TOOLS._le_slash)
le_funcs_range = range(1, len(le_funcs))
d = 2 * max(1, self.border["width"])
rect = self.rect + (-d, -d, d, d)
ap_updated = True
points = self.vertices
if line_end_le in le_funcs_range:
p1 = Point(points[0]) * imat
p2 = Point(points[1]) * imat
left = le_funcs[line_end_le](self, p1, p2, False, fill_color)
ap += left.encode()
if line_end_ri in le_funcs_range:
p1 = Point(points[-2]) * imat
p2 = Point(points[-1]) * imat
left = le_funcs[line_end_ri](self, p1, p2, True, fill_color)
ap += left.encode()
if ap_updated:
if rect: # rect modified here?
self.set_rect(rect)
self._setAP(ap, rect=1)
else:
self._setAP(ap, rect=0)
#-------------------------------
# handle annotation rotations
#-------------------------------
if annot_type not in ( # only these types are supported
mupdf.PDF_ANNOT_CARET,
mupdf.PDF_ANNOT_CIRCLE,
mupdf.PDF_ANNOT_FILE_ATTACHMENT,
mupdf.PDF_ANNOT_INK,
mupdf.PDF_ANNOT_LINE,
mupdf.PDF_ANNOT_POLY_LINE,
mupdf.PDF_ANNOT_POLYGON,
mupdf.PDF_ANNOT_SQUARE,
mupdf.PDF_ANNOT_STAMP,
mupdf.PDF_ANNOT_TEXT,
):
return
rot = self.rotation # get value from annot object
if rot == -1: # nothing to change
return
M = (self.rect.tl + self.rect.br) / 2 # center of annot rect
if rot == 0: # undo rotations
if abs(apnmat - Matrix(1, 1)) < 1e-5:
return # matrix already is a no-op
quad = self.rect.morph(M, ~apnmat) # derotate rect
self.setRect(quad.rect)
self.set_apn_matrix(Matrix(1, 1)) # appearance matrix = no-op
return
mat = Matrix(rot)
quad = self.rect.morph(M, mat)
self.set_rect(quad.rect)
self.set_apn_matrix(apnmat * mat)
def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None):
"""Update attached file."""
CheckParent(self)
annot = self.this
annot_obj = mupdf.pdf_annot_obj(annot)
pdf = mupdf.pdf_get_bound_document(annot_obj) # the owning PDF
type = mupdf.pdf_annot_type(annot)
if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
raise TypeError( MSG_BAD_ANNOT_TYPE)
stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
# the object for file content
if not stream.m_internal:
RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
# file content given
res = JM_BufferFromBytes(buffer_)
if buffer_ and not res.m_internal:
raise ValueError( MSG_BAD_BUFFER)
if res:
JM_update_stream(pdf, stream, res, 1)
# adjust /DL and /Size parameters
len, _ = mupdf.fz_buffer_storage(res)
l = mupdf.pdf_new_int(len)
mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l)
mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size'))
if filename:
mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename)
mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename)
mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename)
mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename)
mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename)
if ufilename:
mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename)
mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename)
if desc:
mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc)
mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc)
@staticmethod
def update_timing_test():
total = 0
for i in range( 30*1000):
total += i
return total
@property
def vertices(self):
"""annotation vertex points"""
CheckParent(self)
annot = self.this
assert isinstance(annot, mupdf.PdfAnnot)
annot_obj = mupdf.pdf_annot_obj(annot)
page = mupdf.pdf_annot_page(annot)
page_ctm = mupdf.FzMatrix() # page transformation matrix
dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
mupdf.pdf_page_transform(page, dummy, page_ctm)
derot = JM_derotate_page_matrix(page)
page_ctm = mupdf.fz_concat(page_ctm, derot)
#----------------------------------------------------------------
# The following objects occur in different annotation types.
# So we are sure that (!o) occurs at most once.
# Every pair of floats is one point, that needs to be separately
# transformed with the page transformation matrix.
#----------------------------------------------------------------
o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices'))
if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L'))
if not o.m_internal: o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints'))
if not o.m_internal: o = mupdf.pdf_dict_gets(annot_obj, 'CL')
if o.m_internal:
# handle lists with 1-level depth
# weiter
res = []
for i in range(0, mupdf.pdf_array_len(o), 2):
x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i))
y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1))
point = mupdf.FzPoint(x, y)
point = mupdf.fz_transform_point(point, page_ctm)
res.append( (point.x, point.y))
return res
o = mupdf.pdf_dict_gets(annot_obj, 'InkList')
if o.m_internal:
# InkList has 2-level lists
#inklist:
res = []
for i in range(mupdf.pdf_array_len(o)):
res1 = []
o1 = mupdf.pdf_array_get(o, i)
for j in range(0, mupdf.pdf_array_len(o1), 2):
x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j))
y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1))
point = mupdf.FzPoint(x, y)
point = mupdf.fz_transform_point(point, page_ctm)
res1.append( (point.x, point.y))
res.append(res1)
return res
@property
def xref(self):
"""annotation xref number"""
CheckParent(self)
annot = self.this
return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot))
class Archive:
def __init__( self, *args):
'''
Archive(dirname [, path]) - from folder
Archive(file [, path]) - from file name or object
Archive(data, name) - from memory item
Archive() - empty archive
Archive(archive [, path]) - from archive
'''
self._subarchives = list()
self.this = mupdf.fz_new_multi_archive()
if args:
self.add( *args)
def __repr__( self):
return f'Archive, sub-archives: {len(self._subarchives)}'
def _add_arch( self, subarch, path=None):
mupdf.fz_mount_multi_archive( self.this, subarch, path)
def _add_dir( self, folder, path=None):
sub = mupdf.fz_open_directory( folder)
mupdf.fz_mount_multi_archive( self.this, sub, path)
def _add_treeitem( self, memory, name, path=None):
buff = JM_BufferFromBytes( memory)
sub = mupdf.fz_new_tree_archive( mupdf.FzTree())
mupdf.fz_tree_archive_add_buffer( sub, name, buff)
mupdf.fz_mount_multi_archive( self.this, sub, path)
def _add_ziptarfile( self, filepath, type_, path=None):
if type_ == 1:
sub = mupdf.fz_open_zip_archive( filepath)
else:
sub = mupdf.fz_open_tar_archive( filepath)
mupdf.fz_mount_multi_archive( self.this, sub, path)
def _add_ziptarmemory( self, memory, type_, path=None):
buff = JM_BufferFromBytes( memory)
stream = mupdf.fz_open_buffer( buff)
if type_==1:
sub = mupdf.fz_open_zip_archive_with_stream( stream)
else:
sub = mupdf.fz_open_tar_archive_with_stream( stream)
mupdf.fz_mount_multi_archive( self.this, sub, path)
def add( self, content, path=None):
'''
Add a sub-archive.
Args:
content:
The content to be added. May be one of:
`str` - must be path of directory or file.
`bytes`, `bytearray`, `io.BytesIO` - raw data.
`zipfile.Zipfile`.
`tarfile.TarFile`.
`pymupdf.Archive`.
A two-item tuple `(data, name)`.
List or tuple (but not tuple with length 2) of the above.
path: (str) a "virtual" path name, under which the elements
of content can be retrieved. Use it to e.g. cope with
duplicate element names.
'''
def is_binary_data(x):
return isinstance(x, (bytes, bytearray, io.BytesIO))
def make_subarch(entries, mount, fmt):
subarch = dict(fmt=fmt, entries=entries, path=mount)
if fmt != "tree" or self._subarchives == []:
self._subarchives.append(subarch)
else:
ltree = self._subarchives[-1]
if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]:
self._subarchives.append(subarch)
else:
ltree["entries"].extend(subarch["entries"])
self._subarchives[-1] = ltree
if isinstance(content, pathlib.Path):
content = str(content)
if isinstance(content, str):
if os.path.isdir(content):
self._add_dir(content, path)
return make_subarch(os.listdir(content), path, 'dir')
elif os.path.isfile(content):
assert isinstance(path, str) and path != '', \
f'Need name for binary content, but {path=}.'
with open(content) as f:
ff = f.read()
self._add_treeitem(ff, path)
return make_subarch([path], None, 'tree')
else:
raise ValueError(f'Not a file or directory: {content!r}')
elif is_binary_data(content):
assert isinstance(path, str) and path != '' \
f'Need name for binary content, but {path=}.'
self._add_treeitem(content, path)
return make_subarch([path], None, 'tree')
elif isinstance(content, zipfile.ZipFile):
filename = getattr(content, "filename", None)
if filename is None:
fp = content.fp.getvalue()
self._add_ziptarmemory(fp, 1, path)
else:
self._add_ziptarfile(filename, 1, path)
return make_subarch(content.namelist(), path, 'zip')
elif isinstance(content, tarfile.TarFile):
filename = getattr(content.fileobj, "name", None)
if filename is None:
fp = content.fileobj
if not isinstance(fp, io.BytesIO):
fp = fp.fileobj
self._add_ziptarmemory(fp.getvalue(), 0, path)
else:
self._add_ziptarfile(filename, 0, path)
return make_subarch(content.getnames(), path, 'tar')
elif isinstance(content, Archive):
self._add_arch(content, path)
return make_subarch([], path, 'multi')
if isinstance(content, tuple) and len(content) == 2:
# covers the tree item plus path
data, name = content
assert isinstance(name, str), f'Unexpected {type(name)=}'
if is_binary_data(data):
self._add_treeitem(data, name, path=path)
elif isinstance(data, str):
if os.path.isfile(data):
with open(data, 'rb') as f:
ff = f.read()
self._add_treeitem(ff, name, path=path)
else:
assert 0, f'Unexpected {type(data)=}.'
return make_subarch([name], path, 'tree')
elif hasattr(content, '__getitem__'):
# Deal with sequence of disparate items.
for item in content:
self.add(item, path)
return
else:
raise TypeError(f'Unrecognised type {type(content)}.')
assert 0
@property
def entry_list( self):
'''
List of sub archives.
'''
return self._subarchives
def has_entry( self, name):
return mupdf.fz_has_archive_entry( self.this, name)
def read_entry( self, name):
buff = mupdf.fz_read_archive_entry( self.this, name)
return JM_BinFromBuffer( buff)
class Xml:
def __enter__(self):
return self
def __exit__(self, *args):
pass
def __init__( self, rhs):
if isinstance( rhs, mupdf.FzXml):
self.this = rhs
elif isinstance( str):
buff = mupdf.fz_new_buffer_from_copied_data( rhs)
self.this = mupdf.fz_parse_xml_from_html5( buff)
else:
assert 0, f'Unsupported type for rhs: {type(rhs)}'
def _get_node_tree( self):
def show_node(node, items, shift):
while node is not None:
if node.is_text:
items.append((shift, f'"{node.text}"'))
node = node.next
continue
items.append((shift, f"({node.tagname}"))
for k, v in node.get_attributes().items():
items.append((shift, f"={k} '{v}'"))
child = node.first_child
if child:
items = show_node(child, items, shift + 1)
items.append((shift, f"){node.tagname}"))
node = node.next
return items
shift = 0
items = []
items = show_node(self, items, shift)
return items
def add_bullet_list(self):
"""Add bulleted list ("ul" tag)"""
child = self.create_element("ul")
self.append_child(child)
return child
def add_class(self, text):
"""Set some class via CSS. Replaces complete class spec."""
cls = self.get_attribute_value("class")
if cls is not None and text in cls:
return self
self.remove_attribute("class")
if cls is None:
cls = text
else:
cls += " " + text
self.set_attribute("class", cls)
return self
def add_code(self, text=None):
"""Add a "code" tag"""
child = self.create_element("code")
if type(text) is str:
child.append_child(self.create_text_node(text))
prev = self.span_bottom()
if prev is None:
prev = self
prev.append_child(child)
return self
def add_codeblock(self):
"""Add monospaced lines ("pre" node)"""
child = self.create_element("pre")
self.append_child(child)
return child
def add_description_list(self):
"""Add description list ("dl" tag)"""
child = self.create_element("dl")
self.append_child(child)
return child
def add_division(self):
"""Add "div" tag"""
child = self.create_element("div")
self.append_child(child)
return child
def add_header(self, level=1):
"""Add header tag"""
if level not in range(1, 7):
raise ValueError("Header level must be in [1, 6]")
this_tag = self.tagname
new_tag = f"h{level}"
child = self.create_element(new_tag)
if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"):
self.append_child(child)
return child
self.parent.append_child(child)
return child
def add_horizontal_line(self):
"""Add horizontal line ("hr" tag)"""
child = self.create_element("hr")
self.append_child(child)
return child
def add_image(self, name, width=None, height=None, imgfloat=None, align=None):
"""Add image node (tag "img")."""
child = self.create_element("img")
if width is not None:
child.set_attribute("width", f"{width}")
if height is not None:
child.set_attribute("height", f"{height}")
if imgfloat is not None:
child.set_attribute("style", f"float: {imgfloat}")
if align is not None:
child.set_attribute("align", f"{align}")
child.set_attribute("src", f"{name}")
self.append_child(child)
return child
def add_link(self, href, text=None):
"""Add a hyperlink ("a" tag)"""
child = self.create_element("a")
if not isinstance(text, str):
text = href
child.set_attribute("href", href)
child.append_child(self.create_text_node(text))
prev = self.span_bottom()
if prev is None:
prev = self
prev.append_child(child)
return self
def add_list_item(self):
"""Add item ("li" tag) under a (numbered or bulleted) list."""
if self.tagname not in ("ol", "ul"):
raise ValueError("cannot add list item to", self.tagname)
child = self.create_element("li")
self.append_child(child)
return child
def add_number_list(self, start=1, numtype=None):
"""Add numbered list ("ol" tag)"""
child = self.create_element("ol")
if start > 1:
child.set_attribute("start", str(start))
if numtype is not None:
child.set_attribute("type", numtype)
self.append_child(child)
return child
def add_paragraph(self):
"""Add "p" tag"""
child = self.create_element("p")
if self.tagname != "p":
self.append_child(child)
else:
self.parent.append_child(child)
return child
def add_span(self):
child = self.create_element("span")
self.append_child(child)
return child
def add_style(self, text):
"""Set some style via CSS style. Replaces complete style spec."""
style = self.get_attribute_value("style")
if style is not None and text in style:
return self
self.remove_attribute("style")
if style is None:
style = text
else:
style += ";" + text
self.set_attribute("style", style)
return self
def add_subscript(self, text=None):
"""Add a subscript ("sub" tag)"""
child = self.create_element("sub")
if type(text) is str:
child.append_child(self.create_text_node(text))
prev = self.span_bottom()
if prev is None:
prev = self
prev.append_child(child)
return self
def add_superscript(self, text=None):
"""Add a superscript ("sup" tag)"""
child = self.create_element("sup")
if type(text) is str:
child.append_child(self.create_text_node(text))
prev = self.span_bottom()
if prev is None:
prev = self
prev.append_child(child)
return self
def add_text(self, text):
"""Add text. Line breaks are honored."""
lines = text.splitlines()
line_count = len(lines)
prev = self.span_bottom()
if prev is None:
prev = self
for i, line in enumerate(lines):
prev.append_child(self.create_text_node(line))
if i < line_count - 1:
prev.append_child(self.create_element("br"))
return self
def append_child( self, child):
mupdf.fz_dom_append_child( self.this, child.this)
def append_styled_span(self, style):
span = self.create_element("span")
span.add_style(style)
prev = self.span_bottom()
if prev is None:
prev = self
prev.append_child(span)
return prev
def bodytag( self):
return Xml( mupdf.fz_dom_body( self.this))
def clone( self):
ret = mupdf.fz_dom_clone( self.this)
return Xml( ret)
@staticmethod
def color_text(color):
if type(color) is str:
return color
if type(color) is int:
return f"rgb({sRGB_to_rgb(color)})"
if type(color) in (tuple, list):
return f"rgb{tuple(color)}"
return color
def create_element( self, tag):
return Xml( mupdf.fz_dom_create_element( self.this, tag))
def create_text_node( self, text):
return Xml( mupdf.fz_dom_create_text_node( self.this, text))
def debug(self):
"""Print a list of the node tree below self."""
items = self._get_node_tree()
for item in items:
message(" " * item[0] + item[1].replace("\n", "\\n"))
def find( self, tag, att, match):
ret = mupdf.fz_dom_find( self.this, tag, att, match)
if ret.m_internal:
return Xml( ret)
def find_next( self, tag, att, match):
ret = mupdf.fz_dom_find_next( self.this, tag, att, match)
if ret.m_internal:
return Xml( ret)
@property
def first_child( self):
if mupdf.fz_xml_text( self.this):
# text node, has no child.
return
ret = mupdf.fz_dom_first_child( self)
if ret.m_internal:
return Xml( ret)
def get_attribute_value( self, key):
assert key
return mupdf.fz_dom_attribute( self.this, key)
def get_attributes( self):
if mupdf.fz_xml_text( self.this):
# text node, has no attributes.
return
result = dict()
i = 0
while 1:
val, key = mupdf.fz_dom_get_attribute( self.this, i)
if not val or not key:
break
result[ key] = val
i += 1
return result
def insert_after( self, node):
mupdf.fz_dom_insert_after( self.this, node.this)
def insert_before( self, node):
mupdf.fz_dom_insert_before( self.this, node.this)
def insert_text(self, text):
lines = text.splitlines()
line_count = len(lines)
for i, line in enumerate(lines):
self.append_child(self.create_text_node(line))
if i < line_count - 1:
self.append_child(self.create_element("br"))
return self
@property
def is_text(self):
"""Check if this is a text node."""
return self.text is not None
@property
def last_child(self):
"""Return last child node."""
child = self.first_child
if child is None:
return None
while True:
next = child.next
if not next:
return child
child = next
@property
def next( self):
ret = mupdf.fz_dom_next( self.this)
if ret.m_internal:
return Xml( ret)
@property
def parent( self):
ret = mupdf.fz_dom_parent( self.this)
if ret.m_internal:
return Xml( ret)
@property
def previous( self):
ret = mupdf.fz_dom_previous( self.this)
if ret.m_internal:
return Xml( ret)
def remove( self):
mupdf.fz_dom_remove( self.this)
def remove_attribute( self, key):
assert key
mupdf.fz_dom_remove_attribute( self.this, key)
@property
def root( self):
return Xml( mupdf.fz_xml_root( self.this))
def set_align(self, align):
"""Set text alignment via CSS style"""
text = "text-align: %s"
if isinstance( align, str):
t = align
elif align == TEXT_ALIGN_LEFT:
t = "left"
elif align == TEXT_ALIGN_CENTER:
t = "center"
elif align == TEXT_ALIGN_RIGHT:
t = "right"
elif align == TEXT_ALIGN_JUSTIFY:
t = "justify"
else:
raise ValueError(f"Unrecognised {align=}")
text = text % t
self.add_style(text)
return self
def set_attribute( self, key, value):
assert key
mupdf.fz_dom_add_attribute( self.this, key, value)
def set_bgcolor(self, color):
"""Set background color via CSS style"""
text = f"background-color: %s" % self.color_text(color)
self.add_style(text) # does not work on span level
return self
def set_bold(self, val=True):
"""Set bold on / off via CSS style"""
if val:
val="bold"
else:
val="normal"
text = "font-weight: %s" % val
self.append_styled_span(text)
return self
def set_color(self, color):
"""Set text color via CSS style"""
text = f"color: %s" % self.color_text(color)
self.append_styled_span(text)
return self
def set_columns(self, cols):
"""Set number of text columns via CSS style"""
text = f"columns: {cols}"
self.append_styled_span(text)
return self
def set_font(self, font):
"""Set font-family name via CSS style"""
text = "font-family: %s" % font
self.append_styled_span(text)
return self
def set_fontsize(self, fontsize):
"""Set font size name via CSS style"""
if type(fontsize) is str:
px=""
else:
px="px"
text = f"font-size: {fontsize}{px}"
self.append_styled_span(text)
return self
def set_id(self, unique):
"""Set a unique id."""
# check uniqueness
root = self.root
if root.find(None, "id", unique):
raise ValueError(f"id '{unique}' already exists")
self.set_attribute("id", unique)
return self
def set_italic(self, val=True):
"""Set italic on / off via CSS style"""
if val:
val="italic"
else:
val="normal"
text = "font-style: %s" % val
self.append_styled_span(text)
return self
def set_leading(self, leading):
"""Set inter-line spacing value via CSS style - block-level only."""
text = f"-mupdf-leading: {leading}"
self.add_style(text)
return self
def set_letter_spacing(self, spacing):
"""Set inter-letter spacing value via CSS style"""
text = f"letter-spacing: {spacing}"
self.append_styled_span(text)
return self
def set_lineheight(self, lineheight):
"""Set line height name via CSS style - block-level only."""
text = f"line-height: {lineheight}"
self.add_style(text)
return self
def set_margins(self, val):
"""Set margin values via CSS style"""
text = "margins: %s" % val
self.append_styled_span(text)
return self
def set_opacity(self, opacity):
"""Set opacity via CSS style"""
text = f"opacity: {opacity}"
self.append_styled_span(text)
return self
def set_pagebreak_after(self):
"""Insert a page break after this node."""
text = "page-break-after: always"
self.add_style(text)
return self
def set_pagebreak_before(self):
"""Insert a page break before this node."""
text = "page-break-before: always"
self.add_style(text)
return self
def set_properties(
self,
align=None,
bgcolor=None,
bold=None,
color=None,
columns=None,
font=None,
fontsize=None,
indent=None,
italic=None,
leading=None,
letter_spacing=None,
lineheight=None,
margins=None,
pagebreak_after=None,
pagebreak_before=None,
word_spacing=None,
unqid=None,
cls=None,
):
"""Set any or all properties of a node.
To be used for existing nodes preferrably.
"""
root = self.root
temp = root.add_division()
if align is not None:
temp.set_align(align)
if bgcolor is not None:
temp.set_bgcolor(bgcolor)
if bold is not None:
temp.set_bold(bold)
if color is not None:
temp.set_color(color)
if columns is not None:
temp.set_columns(columns)
if font is not None:
temp.set_font(font)
if fontsize is not None:
temp.set_fontsize(fontsize)
if indent is not None:
temp.set_text_indent(indent)
if italic is not None:
temp.set_italic(italic)
if leading is not None:
temp.set_leading(leading)
if letter_spacing is not None:
temp.set_letter_spacing(letter_spacing)
if lineheight is not None:
temp.set_lineheight(lineheight)
if margins is not None:
temp.set_margins(margins)
if pagebreak_after is not None:
temp.set_pagebreak_after()
if pagebreak_before is not None:
temp.set_pagebreak_before()
if word_spacing is not None:
temp.set_word_spacing(word_spacing)
if unqid is not None:
self.set_id(unqid)
if cls is not None:
self.add_class(cls)
styles = []
top_style = temp.get_attribute_value("style")
if top_style is not None:
styles.append(top_style)
child = temp.first_child
while child:
styles.append(child.get_attribute_value("style"))
child = child.first_child
self.set_attribute("style", ";".join(styles))
temp.remove()
return self
def set_text_indent(self, indent):
"""Set text indentation name via CSS style - block-level only."""
text = f"text-indent: {indent}"
self.add_style(text)
return self
def set_underline(self, val="underline"):
text = "text-decoration: %s" % val
self.append_styled_span(text)
return self
def set_word_spacing(self, spacing):
"""Set inter-word spacing value via CSS style"""
text = f"word-spacing: {spacing}"
self.append_styled_span(text)
return self
def span_bottom(self):
"""Find deepest level in stacked spans."""
parent = self
child = self.last_child
if child is None:
return None
while child.is_text:
child = child.previous
if child is None:
break
if child is None or child.tagname != "span":
return None
while True:
if child is None:
return parent
if child.tagname in ("a", "sub","sup","body") or child.is_text:
child = child.next
continue
if child.tagname == "span":
parent = child
child = child.first_child
else:
return parent
@property
def tagname( self):
return mupdf.fz_xml_tag( self.this)
@property
def text( self):
return mupdf.fz_xml_text( self.this)
add_var = add_code
add_samp = add_code
add_kbd = add_code
class Colorspace:
def __init__(self, type_):
"""Supported are GRAY, RGB and CMYK."""
if isinstance( type_, mupdf.FzColorspace):
self.this = type_
elif type_ == CS_GRAY:
self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY)
elif type_ == CS_CMYK:
self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK)
elif type_ == CS_RGB:
self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
else:
self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
def __repr__(self):
x = ("", "GRAY", "", "RGB", "CMYK")[self.n]
return "Colorspace(CS_%s) - %s" % (x, self.name)
def _name(self):
return mupdf.fz_colorspace_name(self.this)
@property
def n(self):
"""Size of one pixel."""
return mupdf.fz_colorspace_n(self.this)
@property
def name(self):
"""Name of the Colorspace."""
return self._name()
class DeviceWrapper:
def __init__(self, *args):
if args_match( args, mupdf.FzDevice):
device, = args
self.this = device
elif args_match( args, Pixmap, None):
pm, clip = args
bbox = JM_irect_from_py( clip)
if mupdf.fz_is_infinite_irect( bbox):
self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm)
else:
self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox)
elif args_match( args, mupdf.FzDisplayList):
dl, = args
self.this = mupdf.fz_new_list_device( dl)
elif args_match( args, mupdf.FzStextPage, None):
tp, flags = args
opts = mupdf.FzStextOptions( flags)
self.this = mupdf.fz_new_stext_device( tp, opts)
else:
raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}')
class DisplayList:
def __del__(self):
if not type(self) is DisplayList: return
self.thisown = False
def __init__(self, *args):
if len(args) == 1 and isinstance(args[0], mupdf.FzRect):
self.this = mupdf.FzDisplayList(args[0])
elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList):
self.this = args[0]
else:
assert 0, f'Unrecognised {args=}'
def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
if isinstance(colorspace, Colorspace):
colorspace = colorspace.this
else:
colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
val.thisown = True
return val
def get_textpage(self, flags=3):
"""Make a TextPage from a DisplayList."""
stext_options = mupdf.FzStextOptions()
stext_options.flags = flags
val = mupdf.FzStextPage(self.this, stext_options)
val.thisown = True
return val
@property
def rect(self):
val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this))
val = Rect(val)
return val
def run(self, dw, m, area):
mupdf.fz_run_display_list(
self.this,
dw.device,
JM_matrix_from_py(m),
JM_rect_from_py(area),
mupdf.FzCookie(),
)
if g_use_extra:
extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf
class Document:
def __contains__(self, loc) -> bool:
if type(loc) is int:
if loc < self.page_count:
return True
return False
if type(loc) not in (tuple, list) or len(loc) != 2:
return False
chapter, pno = loc
if (0
or not isinstance(chapter, int)
or chapter < 0
or chapter >= self.chapter_count
):
return False
if (0
or not isinstance(pno, int)
or pno < 0
or pno >= self.chapter_page_count(chapter)
):
return False
return True
def __delitem__(self, i)->None:
if not self.is_pdf:
raise ValueError("is no PDF")
if type(i) is int:
return self.delete_page(i)
if type(i) in (list, tuple, range):
return self.delete_pages(i)
if type(i) is not slice:
raise ValueError("bad argument type")
pc = self.page_count
start = i.start if i.start else 0
stop = i.stop if i.stop else pc
step = i.step if i.step else 1
while start < 0:
start += pc
if start >= pc:
raise ValueError("bad page number(s)")
while stop < 0:
stop += pc
if stop > pc:
raise ValueError("bad page number(s)")
return self.delete_pages(range(start, stop, step))
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def __getitem__(self, i: int =0):
if isinstance(i, slice):
return [self[j] for j in range(*i.indices(len(self)))]
assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \
f'Invalid item number: {i=}.'
if i not in self:
raise IndexError(f"page {i} not in document")
return self.load_page(i)
def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11):
"""Creates a document. Use 'open' as a synonym.
Notes:
Basic usages:
open() - new PDF document
open(filename) - string or pathlib.Path, must have supported
file extension.
open(type, buffer) - type: valid extension, buffer: bytes object.
open(stream=buffer, filetype=type) - keyword version of previous.
open(filename, fileype=type) - filename with unrecognized extension.
rect, width, height, fontsize: layout reflowable document
on open (e.g. EPUB). Ignored if n/a.
"""
# We temporarily set JM_mupdf_show_errors=0 while we are constructing,
# then restore its orginal value in a `finally:` block.
#
global JM_mupdf_show_errors
JM_mupdf_show_errors_old = JM_mupdf_show_errors
JM_mupdf_show_errors = 0
try:
self.is_closed = False
self.is_encrypted = False
self.is_encrypted = False
self.metadata = None
self.FontInfos = []
self.Graftmaps = {}
self.ShownPages = {}
self.InsertedImages = {}
self._page_refs = weakref.WeakValueDictionary()
if isinstance(filename, mupdf.PdfDocument):
pdf_document = filename
self.this = pdf_document
self.this_is_pdf = True
return
# Classic implementation temporarily sets JM_mupdf_show_errors=0 then
# restores the previous value in `fz_always() {...}` before returning.
#
if not filename or type(filename) is str:
pass
elif hasattr(filename, "absolute"):
filename = str(filename)
elif hasattr(filename, "name"):
filename = filename.name
else:
raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
if stream is not None:
if type(stream) is bytes:
self.stream = stream
elif type(stream) is bytearray:
self.stream = bytes(stream)
elif type(stream) is io.BytesIO:
self.stream = stream.getvalue()
else:
raise TypeError(f"bad stream: {type(stream)=}.")
stream = self.stream
if not (filename or filetype):
filename = 'pdf'
else:
self.stream = None
if filename and self.stream is None:
from_file = True
self._name = filename
else:
from_file = False
self._name = ""
if from_file:
if not os.path.exists(filename):
msg = f"no such file: '{filename}'"
raise FileNotFoundError(msg)
elif not os.path.isfile(filename):
msg = f"'{filename}' is no file"
raise FileDataError(msg)
if from_file and os.path.getsize(filename) == 0:
raise EmptyFileError(f'Cannot open empty file: {filename=}.')
if type(self.stream) is bytes and len(self.stream) == 0:
raise EmptyFileError(f'Cannot open empty stream.')
w = width
h = height
r = JM_rect_from_py(rect)
if not mupdf.fz_is_infinite_rect(r):
w = r.x1 - r.x0
h = r.y1 - r.y0
if stream: # stream given, **MUST** be bytes!
assert isinstance(stream, bytes)
c = stream
#len = (size_t) PyBytes_Size(stream);
if mupdf_cppyy:
buffer_ = mupdf.fz_new_buffer_from_copied_data( c)
data = mupdf.fz_open_buffer( buffer_)
else:
# Pass raw bytes data to mupdf.fz_open_memory(). This assumes
# that the bytes string will not be modified; i think the
# original PyMuPDF code makes the same assumption. Presumably
# setting self.stream above ensures that the bytes will not be
# garbage collected?
data = mupdf.fz_open_memory(mupdf.python_buffer_data(c), len(c))
magic = filename
if not magic:
magic = filetype
# fixme: pymupdf does:
# handler = fz_recognize_document(gctx, filetype);
# if (!handler) raise ValueError( MSG_BAD_FILETYPE)
# but prefer to leave fz_open_document_with_stream() to raise.
doc = mupdf.fz_open_document_with_stream(magic, data)
else:
if filename:
if not filetype:
try:
doc = mupdf.fz_open_document(filename)
except Exception as e:
if g_exceptions_verbose > 1: exception_info()
raise FileDataError(f'Failed to open file {filename!r}.') from e
else:
handler = mupdf.ll_fz_recognize_document(filetype)
if handler:
if handler.open:
#log( f'{handler.open=}')
#log( f'{dir(handler.open)=}')
try:
if mupdf_version_tuple >= (1, 24):
stream = mupdf.FzStream(filename)
accel = mupdf.FzStream()
archive = mupdf.FzArchive(None)
doc = mupdf.ll_fz_document_open_fn_call(
handler.open,
stream.m_internal,
accel.m_internal,
archive.m_internal,
)
else:
doc = mupdf.ll_fz_document_open_fn_call( handler.open, filename)
except Exception as e:
if g_exceptions_verbose > 1: exception_info()
raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
doc = mupdf.FzDocument( doc)
else:
if mupdf_version_tuple < (1, 24):
if handler.open_with_stream:
data = mupdf.fz_open_file( filename)
doc = mupdf.fz_document_open_with_stream_fn_call( handler.open_with_stream, data)
else:
assert 0
else:
raise ValueError( MSG_BAD_FILETYPE)
else:
pdf = mupdf.PdfDocument()
doc = mupdf.FzDocument(pdf)
if w > 0 and h > 0:
mupdf.fz_layout_document(doc, w, h, fontsize)
elif mupdf.fz_is_document_reflowable(doc):
mupdf.fz_layout_document(doc, 400, 600, 11)
this = doc
self.this = this
# fixme: not sure where self.thisown gets initialised in PyMuPDF.
#
self.thisown = True
if self.thisown:
self._graft_id = TOOLS.gen_id()
if self.needs_pass:
self.is_encrypted = True
else: # we won't init until doc is decrypted
self.init_doc()
# the following hack detects invalid/empty SVG files, which else may lead
# to interpreter crashes
if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower():
try:
_ = self.convert_to_pdf() # this seems to always work
except Exception as e:
if g_exceptions_verbose > 1: exception_info()
raise FileDataError("cannot open broken document") from e
if g_use_extra:
self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument)
if self.this_is_pdf:
self.page_count2 = extra.page_count_pdf
else:
self.page_count2 = extra.page_count_fz
finally:
JM_mupdf_show_errors = JM_mupdf_show_errors_old
def __len__(self) -> int:
return self.page_count
def __repr__(self) -> str:
m = "closed " if self.is_closed else ""
if self.stream is None:
if self.name == "":
return m + "Document(<new PDF, doc# %i>)" % self._graft_id
return m + "Document('%s')" % (self.name,)
return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id)
def _addFormFont(self, name, font):
"""Add new form font."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
if not pdf:
return
fonts = mupdf.pdf_dict_getl(
mupdf.pdf_trailer( pdf),
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
PDF_NAME('DR'),
PDF_NAME('Font'),
)
if not fonts.m_internal or not mupdf.pdf_is_dict( fonts):
raise RuntimeError( "PDF has no form fonts yet")
k = mupdf.pdf_new_name( name)
v = JM_pdf_obj_from_str( pdf, font)
mupdf.pdf_dict_put( fonts, k, v)
def _delToC(self):
"""Delete the TOC."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
xrefs = [] # create Python list
pdf = _as_pdf_document(self)
if not pdf:
return xrefs # not a pdf
# get the main root
root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
# get the outline root
olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
if not olroot.m_internal:
return xrefs # no outlines or some problem
first = mupdf.pdf_dict_get(olroot, PDF_NAME('First')) # first outline
xrefs = JM_outline_xrefs(first, xrefs)
xref_count = len(xrefs)
olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root
mupdf.pdf_delete_object(pdf, olroot_xref) # delete OL root
mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root
for i in range(xref_count):
_, xref = JM_INT_ITEM(xrefs, i)
mupdf.pdf_delete_object(pdf, xref) # delete outline item
xrefs.append(olroot_xref)
val = xrefs
self.init_doc()
return val
def _delete_page(self, pno):
pdf = _as_pdf_document(self)
mupdf.pdf_delete_page( pdf, pno)
if pdf.m_internal.rev_page_map:
mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
def _deleteObject(self, xref):
"""Delete object."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
raise ValueError( MSG_BAD_XREF)
mupdf.pdf_delete_object(pdf, xref)
def _embeddedFileGet(self, idx):
pdf = _as_pdf_document(self)
names = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
entry = mupdf.pdf_array_get(names, 2*idx+1)
filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
buf = mupdf.pdf_load_stream(filespec)
cont = JM_BinFromBuffer(buf)
return cont
def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int:
filenames = self.embfile_names()
msg = "'%s' not in EmbeddedFiles array." % str(item)
if item in filenames:
idx = filenames.index(item)
elif item in range(len(filenames)):
idx = item
else:
raise ValueError(msg)
return idx
def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None):
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
data = JM_BufferFromBytes(buffer_)
if not data.m_internal:
raise TypeError( MSG_BAD_BUFFER)
names = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
if not mupdf.pdf_is_array(names):
root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
names = mupdf.pdf_new_array(pdf, 6) # an even number!
mupdf.pdf_dict_putl(
root,
names,
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1)
xref = mupdf.pdf_to_num(
mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F'))
)
mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name))
mupdf.pdf_array_push(names, fileentry)
return xref
def _embfile_del(self, idx):
pdf = _as_pdf_document(self)
names = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
mupdf.pdf_array_delete(names, idx + 1)
mupdf.pdf_array_delete(names, idx)
def _embfile_info(self, idx, infodict):
pdf = _as_pdf_document(self)
xref = 0
ci_xref=0
trailer = mupdf.pdf_trailer(pdf)
names = mupdf.pdf_dict_getl(
trailer,
PDF_NAME('Root'),
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
o = mupdf.pdf_array_get(names, 2*idx+1)
ci = mupdf.pdf_dict_get(o, PDF_NAME('CI'))
if ci.m_internal:
ci_xref = mupdf.pdf_to_num(ci)
infodict["collection"] = ci_xref
name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F')))
infodict[dictkey_filename] = JM_EscapeStrFromStr(name)
name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF')))
infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name)
name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc')))
infodict[dictkey_desc] = JM_UnicodeFromStr(name)
len_ = -1
DL = -1
fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F'))
xref = mupdf.pdf_to_num(fileentry)
o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length'))
if o.m_internal:
len_ = mupdf.pdf_to_int(o)
o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL'))
if o.m_internal:
DL = mupdf.pdf_to_int(o)
else:
o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size'))
if o.m_internal:
DL = mupdf.pdf_to_int(o)
infodict[dictkey_size] = DL
infodict[dictkey_length] = len_
return xref
def _embfile_names(self, namelist):
"""Get list of embedded file names."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
names = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
if mupdf.pdf_is_array(names):
n = mupdf.pdf_array_len(names)
for i in range(0, n, 2):
val = JM_EscapeStrFromStr(
mupdf.pdf_to_text_string(
mupdf.pdf_array_get(names, i)
)
)
namelist.append(val)
def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None):
pdf = _as_pdf_document(self)
xref = 0
names = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
entry = mupdf.pdf_array_get(names, 2*idx+1)
filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
if not filespec.m_internal:
RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
res = JM_BufferFromBytes(buffer_)
if buffer_ and buffer_.m_internal and not res.m_internal:
raise TypeError( MSG_BAD_BUFFER)
if res.m_internal and buffer_ and buffer_.m_internal:
JM_update_stream(pdf, filespec, res, 1)
# adjust /DL and /Size parameters
len, _ = mupdf.fz_buffer_storage(res)
l = mupdf.pdf_new_int(len)
mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l)
mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size'))
xref = mupdf.pdf_to_num(filespec)
if filename:
mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename)
if ufilename:
mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename)
if desc:
mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc)
return xref
def _extend_toc_items(self, items):
"""Add color info to all items of an extended TOC list."""
if self.is_closed:
raise ValueError("document closed")
if g_use_extra:
return extra.Document_extend_toc_items( self.this, items)
pdf = _as_pdf_document(self)
zoom = "zoom"
bold = "bold"
italic = "italic"
collapse = "collapse"
root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
if not root.m_internal:
return
olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
if not olroot.m_internal:
return
first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
if not first.m_internal:
return
xrefs = []
xrefs = JM_outline_xrefs(first, xrefs)
n = len(xrefs)
m = len(items)
if not n:
return
if n != m:
raise IndexError( "internal error finding outline xrefs")
# update all TOC item dictionaries
for i in range(n):
xref = int(xrefs[i])
item = items[i]
itemdict = item[3]
if not isinstance(itemdict, dict):
raise ValueError( "need non-simple TOC format")
itemdict[dictkey_xref] = xrefs[i]
bm = mupdf.pdf_load_object(pdf, xref)
flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F')))
if flags == 1:
itemdict[italic] = True
elif flags == 2:
itemdict[bold] = True
elif flags == 3:
itemdict[italic] = True
itemdict[bold] = True
count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count')))
if count < 0:
itemdict[collapse] = True
elif count > 0:
itemdict[collapse] = False
col = mupdf.pdf_dict_get(bm, PDF_NAME('C'))
if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3:
color = (
mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)),
mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)),
mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)),
)
itemdict[dictkey_color] = color
z=0
obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest'))
if not obj.m_internal or not mupdf.pdf_is_array(obj):
obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D'))
if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5:
z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4))
itemdict[zoom] = float(z)
item[3] = itemdict
items[i] = item
def _forget_page(self, page: Page):
"""Remove a page from document page dict."""
pid = id(page)
if pid in self._page_refs:
#self._page_refs[pid] = None
del self._page_refs[pid]
def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0):
pdf = _as_pdf_document(self)
mylimit = limit
if mylimit < 256:
mylimit = 256
ASSERT_PDF(pdf), f'{pdf=}'
if ordering >= 0:
data, size, index = mupdf.fz_lookup_cjk_font(ordering)
font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
else:
data, size = mupdf.fz_lookup_base14_font(bfname)
if data:
font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
else:
buf = JM_get_fontbuffer(pdf, xref)
if not buf.m_internal:
raise Exception("font at xref %d is not supported" % xref)
font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0)
wlist = []
for i in range(mylimit):
glyph = mupdf.fz_encode_character(font, i)
adv = mupdf.fz_advance_glyph(font, glyph, 0)
if ordering >= 0:
glyph = i
if glyph > 0:
wlist.append( (glyph, adv))
else:
wlist.append( (glyph, 0.0))
return wlist
def _get_page_labels(self):
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
rc = []
pagelabels = mupdf.pdf_new_name("PageLabels")
obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels)
if not obj.m_internal:
return rc
# simple case: direct /Nums object
nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums')))
if nums.m_internal:
JM_get_page_labels(rc, nums)
return rc
# case: /Kids/Nums
nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums')))
if nums.m_internal:
JM_get_page_labels(rc, nums)
return rc
# case: /Kids is an array of multiple /Nums
kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids')))
if not kids.m_internal or not mupdf.pdf_is_array(kids):
return rc
n = mupdf.pdf_array_len(kids)
for i in range(n):
nums = mupdf.pdf_resolve_indirect(
mupdf.pdf_dict_get( mupdf.pdf_array_get(kids, i)),
PDF_NAME('Nums'),
)
JM_get_page_labels(rc, nums)
return rc
def _getMetadata(self, key):
"""Get metadata."""
try:
return mupdf.fz_lookup_metadata2( self.this, key)
except Exception:
if g_exceptions_verbose > 2: exception_info()
return ''
def _getOLRootNumber(self):
"""Get xref of Outline Root, create it if missing."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
# get main root
root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
# get outline root
olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
if not olroot.m_internal:
olroot = mupdf.pdf_new_dict( pdf, 4)
mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines'))
ind_obj = mupdf.pdf_add_object( pdf, olroot)
mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj)
olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
return mupdf.pdf_to_num( olroot)
def _getPDFfileid(self):
"""Get PDF file id."""
pdf = _as_pdf_document(self)
if not pdf:
return
idlist = []
identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
if identity.m_internal:
n = mupdf.pdf_array_len(identity)
for i in range(n):
o = mupdf.pdf_array_get(identity, i)
text = mupdf.pdf_to_text_string(o)
hex_ = binascii.hexlify(text)
idlist.append(hex_)
return idlist
def _getPageInfo(self, pno, what):
"""List fonts, images, XObjects used on a page."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
doc = self.this
pdf = _as_pdf_document(self)
pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc)
n = pno # pno < 0 is allowed
while n < 0:
n += pageCount # make it non-negative
if n >= pageCount:
raise ValueError( MSG_BAD_PAGENO)
pageref = mupdf.pdf_lookup_page_obj(pdf, n)
rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources)
liste = []
tracer = []
if rsrc.m_internal:
JM_scan_resources(pdf, rsrc, liste, what, 0, tracer)
return liste
def _insert_font(self, fontfile=None, fontbuffer=None):
'''
Utility: insert font from file or binary.
'''
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
if not fontfile and not fontbuffer:
raise ValueError( MSG_FILE_OR_BUFFER)
value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1)
return value
def _loadOutline(self):
"""Load first outline."""
doc = self.this
assert isinstance( doc, mupdf.FzDocument)
try:
ol = mupdf.fz_load_outline( doc)
except Exception:
if g_exceptions_verbose > 1: exception_info()
return
return Outline( ol)
def _make_page_map(self):
"""Make an array page number -> page object."""
if self.is_closed:
raise ValueError("document closed")
assert 0, f'_make_page_map() is no-op'
def _move_copy_page(self, pno, nb, before, copy):
"""Move or copy a PDF page reference."""
pdf = _as_pdf_document(self)
same = 0
ASSERT_PDF(pdf)
# get the two page objects -----------------------------------
# locate the /Kids arrays and indices in each
page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno)
kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids'))
page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb)
kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids'))
if before: # calc index of source page in target /Kids
pos = i2
else:
pos = i2 + 1
# same /Kids array? ------------------------------------------
same = mupdf.pdf_objcmp( kids1, kids2)
# put source page in target /Kids array ----------------------
if not copy and same != 0: # update parent in page object
mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2)
mupdf.pdf_array_insert( kids2, page1, pos)
if same != 0: # different /Kids arrays ----------------------
parent = parent2
while parent.m_internal: # increase /Count objects in parents
count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
if not copy: # delete original item
mupdf.pdf_array_delete( kids1, i1)
parent = parent1
while parent.m_internal: # decrease /Count objects in parents
count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1)
parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
else: # same /Kids array
if copy: # source page is copied
parent = parent2
while parent.m_internal: # increase /Count object in parents
count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
else:
if i1 < pos:
mupdf.pdf_array_delete( kids1, i1)
else:
mupdf.pdf_array_delete( kids1, i1 + 1)
if pdf.m_internal.rev_page_map: # page map no longer valid: drop it
mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
self._reset_page_refs()
def _newPage(self, pno=-1, width=595, height=842):
"""Make a new PDF page."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if g_use_extra:
extra._newPage( self.this, pno, width, height)
else:
pdf = _as_pdf_document(self)
assert isinstance(pdf, mupdf.PdfDocument)
mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
mediabox.x1 = width
mediabox.y1 = height
contents = mupdf.FzBuffer()
if pno < -1:
raise ValueError( MSG_BAD_PAGENO)
# create /Resources and /Contents objects
#resources = pdf.add_object(pdf.new_dict(1))
resources = mupdf.pdf_add_new_dict(pdf, 1)
page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents)
mupdf.pdf_insert_page( pdf, pno, page_obj)
# fixme: pdf->dirty = 1;
self._reset_page_refs()
return self[pno]
def _remove_links_to(self, numbers):
pdf = _as_pdf_document(self)
_remove_dest_range(pdf, numbers)
def _remove_toc_item(self, xref):
# "remove" bookmark by letting it point to nowhere
pdf = _as_pdf_document(self)
item = mupdf.pdf_new_indirect(pdf, xref, 0)
mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
mupdf.pdf_dict_del( item, PDF_NAME('A'))
color = mupdf.pdf_new_array( pdf, 3)
for i in range(3):
mupdf.pdf_array_push_real( color, 0.8)
mupdf.pdf_dict_put( item, PDF_NAME('C'), color)
def _reset_page_refs(self):
"""Invalidate all pages in document dictionary."""
if getattr(self, "is_closed", True):
return
pages = [p for p in self._page_refs.values()]
for page in pages:
if page:
page._erase()
page = None
self._page_refs.clear()
def _set_page_labels(self, labels):
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
pagelabels = mupdf.pdf_new_name("PageLabels")
root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
mupdf.pdf_dict_del(root, pagelabels)
mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums'))
xref = self.pdf_catalog()
text = self.xref_object(xref, compressed=True)
text = text.replace("/Nums[]", "/Nums[%s]" % labels)
self.update_object(xref, text)
def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None):
'''
"update" bookmark by letting it point to nowhere
'''
pdf = _as_pdf_document(self)
item = mupdf.pdf_new_indirect( pdf, xref, 0)
if title:
mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title)
if action:
mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
obj = JM_pdf_obj_from_str( pdf, action)
mupdf.pdf_dict_put( item, PDF_NAME('A'), obj)
mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags)
if color:
c = mupdf.pdf_new_array( pdf, 3)
for i in range(3):
f = color[i]
mupdf.pdf_array_push_real( c, f)
mupdf.pdf_dict_put( item, PDF_NAME('C'), c)
elif color is not None:
mupdf.pdf_dict_del( item, PDF_NAME('C'))
if collapse is not None:
if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal:
i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count'))
if (i < 0 and collapse is False) or (i > 0 and collapse is True):
i = i * (-1)
mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i)
@property
def FormFonts(self):
"""Get list of field font resource names."""
pdf = _as_pdf_document(self)
if not pdf:
return
fonts = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
PDF_NAME('DR'),
PDF_NAME('Font'),
)
liste = list()
if fonts.m_internal and mupdf.pdf_is_dict(fonts): # fonts exist
n = mupdf.pdf_dict_len(fonts)
for i in range(n):
f = mupdf.pdf_dict_get_key(fonts, i)
liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f)))
return liste
def add_layer(self, name, creator=None, on=None):
"""Add a new OC layer."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
JM_add_layer_config( pdf, name, creator, on)
mupdf.ll_pdf_read_ocg( pdf.m_internal)
def add_ocg(self, name, config=-1, on=1, intent=None, usage=None):
"""Add new optional content group."""
xref = 0
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
# make the OCG
ocg = mupdf.pdf_add_new_dict(pdf, 3)
mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG'))
mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name)
intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2)
if not intent:
mupdf.pdf_array_push(intents, PDF_NAME('View'))
elif not isinstance(intent, str):
assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}'
#n = len(intent)
#for i in range(n):
# item = intent[i]
# c = JM_StrAsChar(item);
# if (c) {
# pdf_array_push(gctx, intents, pdf_new_name(gctx, c));
# }
# Py_DECREF(item);
#}
else:
mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent))
use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3)
ci_name = mupdf.pdf_new_name("CreatorInfo")
cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2)
mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF")
if usage:
mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage)
else:
mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork")
indocg = mupdf.pdf_add_object(pdf, ocg)
# Insert OCG in the right config
ocp = JM_ensure_ocproperties(pdf)
obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs'))
mupdf.pdf_array_push(obj, indocg)
if config > -1:
obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs'))
if not mupdf.pdf_is_array(obj):
raise ValueError( MSG_BAD_OC_CONFIG)
cfg = mupdf.pdf_array_get(obj, config)
if not cfg.m_internal:
raise ValueError( MSG_BAD_OC_CONFIG)
else:
cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D'))
obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order'))
if not obj.m_internal:
obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1)
mupdf.pdf_array_push(obj, indocg)
if on:
obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON'))
if not obj.m_internal:
obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1)
else:
obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF'))
if not obj.m_internal:
obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1)
mupdf.pdf_array_push(obj, indocg)
# let MuPDF take note: re-read OCProperties
mupdf.ll_pdf_read_ocg(pdf.m_internal)
xref = mupdf.pdf_to_num(indocg)
return xref
def authenticate(self, password):
"""Decrypt document."""
if self.is_closed:
raise ValueError("document closed")
val = mupdf.fz_authenticate_password(self.this, password)
if val: # the doc is decrypted successfully and we init the outline
self.is_encrypted = False
self.is_encrypted = False
self.init_doc()
self.thisown = True
return val
def can_save_incrementally(self):
"""Check whether incremental saves are possible."""
pdf = _as_pdf_document(self)
if not pdf:
return False
return mupdf.pdf_can_be_saved_incrementally(pdf)
def bake(self, *, annots: bool = True, widgets: bool = True) -> None:
"""Convert annotations or fields to permanent content.
Notes:
Converts annotations or widgets to permanent page content, like
text and vector graphics, as appropriate.
After execution, pages will still look the same, but no longer
have annotations, respectively no fields.
If widgets are selected the PDF will no longer be a Form PDF.
Args:
annots: convert annotations
widgets: convert form fields
"""
pdf = _as_pdf_document(self)
if not pdf:
raise ValueError("not a PDF")
mupdf.pdf_bake_document(pdf, int(annots), int(widgets))
@property
def chapter_count(self):
"""Number of chapters."""
if self.is_closed:
raise ValueError("document closed")
return mupdf.fz_count_chapters( self.this)
def chapter_page_count(self, chapter):
"""Page count of chapter."""
if self.is_closed:
raise ValueError("document closed")
chapters = mupdf.fz_count_chapters( self.this)
if chapter < 0 or chapter >= chapters:
raise ValueError( "bad chapter number")
pages = mupdf.fz_count_chapter_pages( self.this, chapter)
return pages
def close(self):
"""Close document."""
if getattr(self, "is_closed", True):
raise ValueError("document closed")
# self._cleanup()
if hasattr(self, "_outline") and self._outline:
self._outline = None
self._reset_page_refs()
#self.metadata = None
#self.stream = None
self.is_closed = True
#self.FontInfos = []
self.Graftmaps = {} # Fixes test_3140().
#self.ShownPages = {}
#self.InsertedImages = {}
#self.this = None
self.this = None
def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
"""Convert document to a PDF, selecting page range and optional rotation. Output bytes object."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
fz_doc = self.this
fp = from_page
tp = to_page
srcCount = mupdf.fz_count_pages(fz_doc)
if fp < 0:
fp = 0
if fp > srcCount - 1:
fp = srcCount - 1
if tp < 0:
tp = srcCount - 1
if tp > srcCount - 1:
tp = srcCount - 1
len0 = len(JM_mupdf_warnings_store)
doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate)
len1 = len(JM_mupdf_warnings_store)
for i in range(len0, len1):
message(f'{JM_mupdf_warnings_store[i]}')
return doc
def copy_page(self, pno: int, to: int =-1):
"""Copy a page within a PDF document.
This will only create another reference of the same page object.
Args:
pno: source page number
to: put before this page, '-1' means after last page.
"""
if self.is_closed:
raise ValueError("document closed")
page_count = len(self)
if (
pno not in range(page_count)
or to not in range(-1, page_count)
):
raise ValueError("bad page number(s)")
before = 1
copy = 1
if to == -1:
to = page_count - 1
before = 0
return self._move_copy_page(pno, to, before, copy)
def del_xml_metadata(self):
"""Delete XML metadata."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
if root.m_internal:
mupdf.pdf_dict_del( root, PDF_NAME('Metadata'))
def delete_page(self, pno: int =-1):
""" Delete one page from a PDF.
"""
if not self.is_pdf:
raise ValueError("is no PDF")
if self.is_closed:
raise ValueError("document closed")
page_count = self.page_count
while pno < 0:
pno += page_count
if pno >= page_count:
raise ValueError("bad page number(s)")
# remove TOC bookmarks pointing to deleted page
toc = self.get_toc()
ol_xrefs = self.get_outline_xrefs()
for i, item in enumerate(toc):
if item[2] == pno + 1:
self._remove_toc_item(ol_xrefs[i])
self._remove_links_to(frozenset((pno,)))
self._delete_page(pno)
self._reset_page_refs()
def delete_pages(self, *args, **kw):
"""Delete pages from a PDF.
Args:
Either keywords 'from_page'/'to_page', or two integers to
specify the first/last page to delete.
Or a list/tuple/range object, which can contain arbitrary
page numbers.
"""
if not self.is_pdf:
raise ValueError("is no PDF")
if self.is_closed:
raise ValueError("document closed")
page_count = self.page_count # page count of document
f = t = -1
if kw: # check if keywords were used
if args: # then no positional args are allowed
raise ValueError("cannot mix keyword and positional argument")
f = kw.get("from_page", -1) # first page to delete
t = kw.get("to_page", -1) # last page to delete
while f < 0:
f += page_count
while t < 0:
t += page_count
if not f <= t < page_count:
raise ValueError("bad page number(s)")
numbers = tuple(range(f, t + 1))
else:
if len(args) > 2 or args == []:
raise ValueError("need 1 or 2 positional arguments")
if len(args) == 2:
f, t = args
if not (type(f) is int and type(t) is int):
raise ValueError("both arguments must be int")
if f > t:
f, t = t, f
if not f <= t < page_count:
raise ValueError("bad page number(s)")
numbers = tuple(range(f, t + 1))
else:
r = args[0]
if type(r) not in (int, range, list, tuple):
raise ValueError("need int or sequence if one argument")
numbers = tuple(r)
numbers = list(map(int, set(numbers))) # ensure unique integers
if numbers == []:
message("nothing to delete")
return
numbers.sort()
if numbers[0] < 0 or numbers[-1] >= page_count:
raise ValueError("bad page number(s)")
frozen_numbers = frozenset(numbers)
toc = self.get_toc()
for i, xref in enumerate(self.get_outline_xrefs()):
if toc[i][2] - 1 in frozen_numbers:
self._remove_toc_item(xref) # remove target in PDF object
self._remove_links_to(frozen_numbers)
for i in reversed(numbers): # delete pages, last to first
self._delete_page(i)
self._reset_page_refs()
def embfile_add(self,
name: str,
buffer_: typing.ByteString,
filename: OptStr =None,
ufilename: OptStr =None,
desc: OptStr =None,
) -> None:
"""Add an item to the EmbeddedFiles array.
Args:
name: name of the new item, must not already exist.
buffer_: (binary data) the file content.
filename: (str) the file name, default: the name
ufilename: (unicode) the file name, default: filename
desc: (str) the description.
"""
filenames = self.embfile_names()
msg = "Name '%s' already exists." % str(name)
if name in filenames:
raise ValueError(msg)
if filename is None:
filename = name
if ufilename is None:
ufilename = filename
if desc is None:
desc = name
xref = self._embfile_add(
name,
buffer_=buffer_,
filename=filename,
ufilename=ufilename,
desc=desc,
)
date = get_pdf_now()
self.xref_set_key(xref, "Type", "/EmbeddedFile")
self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date))
self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
return xref
def embfile_count(self) -> int:
"""Get number of EmbeddedFiles."""
return len(self.embfile_names())
def embfile_del(self, item: typing.Union[int, str]):
"""Delete an entry from EmbeddedFiles.
Notes:
The argument must be name or index of an EmbeddedFiles item.
Physical deletion of data will happen on save to a new
file with appropriate garbage option.
Args:
item: name or number of item.
Returns:
None
"""
idx = self._embeddedFileIndex(item)
return self._embfile_del(idx)
def embfile_get(self, item: typing.Union[int, str]) -> bytes:
"""Get the content of an item in the EmbeddedFiles array.
Args:
item: number or name of item.
Returns:
(bytes) The file content.
"""
idx = self._embeddedFileIndex(item)
return self._embeddedFileGet(idx)
def embfile_info(self, item: typing.Union[int, str]) -> dict:
"""Get information of an item in the EmbeddedFiles array.
Args:
item: number or name of item.
Returns:
Information dictionary.
"""
idx = self._embeddedFileIndex(item)
infodict = {"name": self.embfile_names()[idx]}
xref = self._embfile_info(idx, infodict)
t, date = self.xref_get_key(xref, "Params/CreationDate")
if t != "null":
infodict["creationDate"] = date
t, date = self.xref_get_key(xref, "Params/ModDate")
if t != "null":
infodict["modDate"] = date
t, md5 = self.xref_get_key(xref, "Params/CheckSum")
if t != "null":
infodict["checksum"] = binascii.hexlify(md5.encode()).decode()
return infodict
def embfile_names(self) -> list:
"""Get list of names of EmbeddedFiles."""
filenames = []
self._embfile_names(filenames)
return filenames
def embfile_upd(self,
item: typing.Union[int, str],
buffer_: OptBytes =None,
filename: OptStr =None,
ufilename: OptStr =None,
desc: OptStr =None,
) -> None:
"""Change an item of the EmbeddedFiles array.
Notes:
Only provided parameters are changed. If all are omitted,
the method is a no-op.
Args:
item: number or name of item.
buffer_: (binary data) the new file content.
filename: (str) the new file name.
ufilename: (unicode) the new filen ame.
desc: (str) the new description.
"""
idx = self._embeddedFileIndex(item)
xref = self._embfile_upd(
idx,
buffer_=buffer_,
filename=filename,
ufilename=ufilename,
desc=desc,
)
date = get_pdf_now()
self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
return xref
def extract_font(self, xref=0, info_only=0, named=None):
'''
Get a font by xref. Returns a tuple or dictionary.
'''
#log( '{=xref info_only}')
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
obj = mupdf.pdf_load_object(pdf, xref)
type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type'))
subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
if (mupdf.pdf_name_eq(type_, PDF_NAME('Font'))
and not mupdf.pdf_to_name( subtype).startswith('CIDFontType')
):
basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont'))
if not basefont.m_internal or mupdf.pdf_is_null(basefont):
bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name'))
else:
bname = basefont
ext = JM_get_fontextension(pdf, xref)
if ext != 'n/a' and not info_only:
buffer_ = JM_get_fontbuffer(pdf, xref)
bytes_ = JM_BinFromBuffer(buffer_)
else:
bytes_ = b''
if not named:
rc = (
JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
JM_UnicodeFromStr(ext),
JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
bytes_,
)
else:
rc = {
dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
dictkey_ext: JM_UnicodeFromStr(ext),
dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
dictkey_content: bytes_,
}
else:
if not named:
rc = '', '', '', b''
else:
rc = {
dictkey_name: '',
dictkey_ext: '',
dictkey_type: '',
dictkey_content: b'',
}
return rc
def extract_image(self, xref):
"""Get image by xref. Returns a dictionary."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
img_type = 0
smask = 0
ASSERT_PDF(pdf)
if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
raise ValueError( MSG_BAD_XREF)
obj = mupdf.pdf_new_indirect(pdf, xref, 0)
subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')):
raise ValueError( "not an image")
o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
if o.m_internal:
smask = mupdf.pdf_to_num(o)
if mupdf.pdf_is_jpx_image(obj):
img_type = mupdf.FZ_IMAGE_JPX
res = mupdf.pdf_load_stream(obj)
ext = "jpx"
if JM_is_jbig2_image(obj):
img_type = mupdf.FZ_IMAGE_JBIG2
res = mupdf.pdf_load_stream(obj)
ext = "jb2"
res = mupdf.pdf_load_raw_stream(obj)
if img_type == mupdf.FZ_IMAGE_UNKNOWN:
res = mupdf.pdf_load_raw_stream(obj)
_, c = mupdf.fz_buffer_storage(res)
#log( '{=_ c}')
img_type = mupdf.fz_recognize_image_format(c)
ext = JM_image_extension(img_type)
if img_type == mupdf.FZ_IMAGE_UNKNOWN:
res = None
img = mupdf.pdf_load_image(pdf, obj)
ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
if (ll_cbuf
and ll_cbuf.params.type not in (
mupdf.FZ_IMAGE_RAW,
mupdf.FZ_IMAGE_FAX,
mupdf.FZ_IMAGE_FLATE,
mupdf.FZ_IMAGE_LZW,
mupdf.FZ_IMAGE_RLD,
)
):
img_type = ll_cbuf.params.type
ext = JM_image_extension(img_type)
res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
else:
res = mupdf.fz_new_buffer_from_image_as_png(
img,
mupdf.FzColorParams(mupdf.fz_default_color_params),
)
ext = "png"
else:
img = mupdf.fz_new_image_from_buffer(res)
xres, yres = mupdf.fz_image_resolution(img)
width = img.w()
height = img.h()
colorspace = img.n()
bpc = img.bpc()
cs_name = mupdf.fz_colorspace_name(img.colorspace())
rc = dict()
rc[ dictkey_ext] = ext
rc[ dictkey_smask] = smask
rc[ dictkey_width] = width
rc[ dictkey_height] = height
rc[ dictkey_colorspace] = colorspace
rc[ dictkey_bpc] = bpc
rc[ dictkey_xres] = xres
rc[ dictkey_yres] = yres
rc[ dictkey_cs_name] = cs_name
rc[ dictkey_image] = JM_BinFromBuffer(res)
return rc
def ez_save(
self,
filename,
garbage=3,
clean=False,
deflate=True,
deflate_images=True,
deflate_fonts=True,
incremental=False,
ascii=False,
expand=False,
linear=False,
pretty=False,
encryption=1,
permissions=4095,
owner_pw=None,
user_pw=None,
no_new_id=True,
preserve_metadata=1,
use_objstms=1,
compression_effort=0,
):
'''
Save PDF using some different defaults
'''
return self.save(
filename,
garbage=garbage,
clean=clean,
deflate=deflate,
deflate_images=deflate_images,
deflate_fonts=deflate_fonts,
incremental=incremental,
ascii=ascii,
expand=expand,
linear=linear,
pretty=pretty,
encryption=encryption,
permissions=permissions,
owner_pw=owner_pw,
user_pw=user_pw,
no_new_id=no_new_id,
preserve_metadata=preserve_metadata,
use_objstms=use_objstms,
compression_effort=compression_effort,
)
def find_bookmark(self, bm):
"""Find new location after layouting a document."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
location = mupdf.fz_lookup_bookmark2( self.this, bm)
return location.chapter, location.page
def fullcopy_page(self, pno, to=-1):
"""Make a full page duplicate."""
pdf = _as_pdf_document(self)
page_count = mupdf.pdf_count_pages( pdf)
try:
ASSERT_PDF(pdf)
if (not _INRANGE(pno, 0, page_count - 1)
or not _INRANGE(to, -1, page_count - 1)
):
raise ValueError( MSG_BAD_PAGENO)
page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno))
page2 = mupdf.pdf_deep_copy_obj( page1)
old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots'))
# copy annotations, but remove Popup and IRT types
if old_annots.m_internal:
n = mupdf.pdf_array_len( old_annots)
new_annots = mupdf.pdf_new_array( pdf, n)
for i in range(n):
o = mupdf.pdf_array_get( old_annots, i)
subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
continue
if mupdf.pdf_dict_gets( o, "IRT").m_internal:
continue
copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o))
xref = mupdf.pdf_create_object( pdf)
mupdf.pdf_update_object( pdf, xref, copy_o)
copy_o = mupdf.pdf_new_indirect( pdf, xref, 0)
mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup'))
mupdf.pdf_dict_del( copy_o, PDF_NAME('P'))
mupdf.pdf_array_push( new_annots, copy_o)
mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots)
# copy the old contents stream(s)
res = JM_read_contents( page1)
# create new /Contents object for page2
if res.m_internal:
#contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" ", 1), NULL, 0)
contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0)
JM_update_stream( pdf, contents, res, 1)
mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents)
# now insert target page, making sure it is an indirect object
xref = mupdf.pdf_create_object( pdf) # get new xref
mupdf.pdf_update_object( pdf, xref, page2) # store new page
page2 = mupdf.pdf_new_indirect( pdf, xref, 0) # reread object
mupdf.pdf_insert_page( pdf, to, page2) # and store the page
finally:
mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
self._reset_page_refs()
def get_layer(self, config=-1):
"""Content of ON, OFF, RBGroups of an OC layer."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
ocp = mupdf.pdf_dict_getl(
mupdf.pdf_trailer( pdf),
PDF_NAME('Root'),
PDF_NAME('OCProperties'),
)
if not ocp.m_internal:
return
if config == -1:
obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
else:
obj = mupdf.pdf_array_get(
mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
config,
)
if not obj.m_internal:
raise ValueError( MSG_BAD_OC_CONFIG)
rc = JM_get_ocg_arrays( obj)
return rc
def get_layers(self):
"""Show optional OC layers."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
n = mupdf.pdf_count_layer_configs( pdf)
if n == 1:
obj = mupdf.pdf_dict_getl(
mupdf.pdf_trailer( pdf),
PDF_NAME('Root'),
PDF_NAME('OCProperties'),
PDF_NAME('Configs'),
)
if not mupdf.pdf_is_array( obj):
n = 0
rc = []
info = mupdf.PdfLayerConfig()
for i in range(n):
mupdf.pdf_layer_config_info( pdf, i, info)
item = {
"number": i,
"name": info.name,
"creator": info.creator,
}
rc.append( item)
return rc
def get_new_xref(self):
"""Make new xref."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
xref = 0
ASSERT_PDF(pdf)
ENSURE_OPERATION(pdf)
xref = mupdf.pdf_create_object(pdf)
return xref
def get_ocgs(self):
"""Show existing optional content groups."""
ci = mupdf.pdf_new_name( "CreatorInfo")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
ocgs = mupdf.pdf_dict_getl(
mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')),
PDF_NAME('OCProperties'),
PDF_NAME('OCGs'),
)
rc = dict()
if not mupdf.pdf_is_array( ocgs):
return rc
n = mupdf.pdf_array_len( ocgs)
for i in range(n):
ocg = mupdf.pdf_array_get( ocgs, i)
xref = mupdf.pdf_to_num( ocg)
name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name')))
obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype'))
usage = None
if obj.m_internal:
usage = mupdf.pdf_to_name( obj)
intents = list()
intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent'))
if intent.m_internal:
if mupdf.pdf_is_name( intent):
intents.append( mupdf.pdf_to_name( intent))
elif mupdf.pdf_is_array( intent):
m = mupdf.pdf_array_len( intent)
for j in range(m):
o = mupdf.pdf_array_get( intent, j)
if mupdf.pdf_is_name( o):
intents.append( mupdf.pdf_to_name( o))
hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
item = {
"name": name,
"intent": intents,
"on": not hidden,
"usage": usage,
}
temp = xref
rc[ temp] = item
return rc
def get_outline_xrefs(self):
"""Get list of outline xref numbers."""
xrefs = []
pdf = _as_pdf_document(self)
if not pdf:
return xrefs
root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
if not root.m_internal:
return xrefs
olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
if not olroot.m_internal:
return xrefs
first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
if not first.m_internal:
return xrefs
xrefs = JM_outline_xrefs(first, xrefs)
return xrefs
def get_page_fonts(self, pno: int, full: bool =False) -> list:
"""Retrieve a list of fonts used on a page.
"""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if not self.is_pdf:
return ()
if type(pno) is not int:
try:
pno = pno.number
except Exception:
exception_info()
raise ValueError("need a Page or page number")
val = self._getPageInfo(pno, 1)
if full is False:
return [v[:-1] for v in val]
return val
def get_page_images(self, pno: int, full: bool =False) -> list:
"""Retrieve a list of images used on a page.
"""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if not self.is_pdf:
return ()
val = self._getPageInfo(pno, 2)
if full is False:
return [v[:-1] for v in val]
return val
def get_page_xobjects(self, pno: int) -> list:
"""Retrieve a list of XObjects used on a page.
"""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if not self.is_pdf:
return ()
val = self._getPageInfo(pno, 3)
return val
def get_sigflags(self):
"""Get the /SigFlags value."""
pdf = _as_pdf_document(self)
if not pdf:
return -1 # not a PDF
sigflags = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
PDF_NAME('SigFlags'),
)
sigflag = -1
if sigflags.m_internal:
sigflag = mupdf.pdf_to_int(sigflags)
return sigflag
def get_xml_metadata(self):
"""Get document XML metadata."""
xml = None
pdf = _as_pdf_document(self)
if pdf.m_internal:
xml = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('Metadata'),
)
if xml and xml.m_internal:
buff = mupdf.pdf_load_stream(xml)
rc = JM_UnicodeFromBuffer(buff)
else:
rc = ''
return rc
def init_doc(self):
if self.is_encrypted:
raise ValueError("cannot initialize - document still encrypted")
self._outline = self._loadOutline()
self.metadata = dict(
[
(k,self._getMetadata(v)) for k,v in {
'format':'format',
'title':'info:Title',
'author':'info:Author',
'subject':'info:Subject',
'keywords':'info:Keywords',
'creator':'info:Creator',
'producer':'info:Producer',
'creationDate':'info:CreationDate',
'modDate':'info:ModDate',
'trapped':'info:Trapped'
}.items()
]
)
self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption')
def insert_file(self,
infile,
from_page=-1,
to_page=-1,
start_at=-1,
rotate=-1,
links=True,
annots=True,
show_progress=0,
final=1,
):
'''
Insert an arbitrary supported document to an existing PDF.
The infile may be given as a filename, a Document or a Pixmap.
Other paramters - where applicable - equal those of insert_pdf().
'''
src = None
if isinstance(infile, Pixmap):
if infile.colorspace.n > 3:
infile = Pixmap(csRGB, infile)
src = Document("png", infile.tobytes())
elif isinstance(infile, Document):
src = infile
else:
src = Document(infile)
if not src:
raise ValueError("bad infile parameter")
if not src.is_pdf:
pdfbytes = src.convert_to_pdf()
src = Document("pdf", pdfbytes)
return self.insert_pdf(
src,
from_page=from_page,
to_page=to_page,
start_at=start_at,
rotate=rotate,
links=links,
annots=annots,
show_progress=show_progress,
final=final,
)
def insert_pdf(
self,
docsrc,
from_page=-1,
to_page=-1,
start_at=-1,
rotate=-1,
links=1,
annots=1,
show_progress=0,
final=1,
_gmap=None,
):
"""Insert a page range from another PDF.
Args:
docsrc: PDF to copy from. Must be different object, but may be same file.
from_page: (int) first source page to copy, 0-based, default 0.
to_page: (int) last source page to copy, 0-based, default last page.
start_at: (int) from_page will become this page number in target.
rotate: (int) rotate copied pages, default -1 is no change.
links: (int/bool) whether to also copy links.
annots: (int/bool) whether to also copy annotations.
show_progress: (int) progress message interval, 0 is no messages.
final: (bool) indicates last insertion from this source PDF.
_gmap: internal use only
Copy sequence reversed if from_page > to_page."""
# Insert pages from a source PDF into this PDF.
# For reconstructing the links (_do_links method), we must save the
# insertion point (start_at) if it was specified as -1.
#log( 'insert_pdf(): start')
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if self._graft_id == docsrc._graft_id:
raise ValueError("source and target cannot be same object")
sa = start_at
if sa < 0:
sa = self.page_count
if len(docsrc) > show_progress > 0:
inname = os.path.basename(docsrc.name)
if not inname:
inname = "memory PDF"
outname = os.path.basename(self.name)
if not outname:
outname = "memory PDF"
message("Inserting '%s' at '%s'" % (inname, outname))
# retrieve / make a Graftmap to avoid duplicate objects
#log( 'insert_pdf(): Graftmaps')
isrt = docsrc._graft_id
_gmap = self.Graftmaps.get(isrt, None)
if _gmap is None:
#log( 'insert_pdf(): Graftmaps2')
_gmap = Graftmap(self)
self.Graftmaps[isrt] = _gmap
if g_use_extra:
#log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()')
extra_FzDocument_insert_pdf(
self.this,
docsrc.this,
from_page,
to_page,
start_at,
rotate,
links,
annots,
show_progress,
final,
_gmap,
)
#log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.')
else:
pdfout = _as_pdf_document(self)
pdfsrc = _as_pdf_document(docsrc)
outCount = mupdf.fz_count_pages(self)
srcCount = mupdf.fz_count_pages(docsrc.this)
# local copies of page numbers
fp = from_page
tp = to_page
sa = start_at
# normalize page numbers
fp = max(fp, 0) # -1 = first page
fp = min(fp, srcCount - 1) # but do not exceed last page
if tp < 0:
tp = srcCount - 1 # -1 = last page
tp = min(tp, srcCount - 1) # but do not exceed last page
if sa < 0:
sa = outCount # -1 = behind last page
sa = min(sa, outCount) # but that is also the limit
if not pdfout.m_internal or not pdfsrc.m_internal:
raise TypeError( "source or target not a PDF")
ENSURE_OPERATION(pdfout)
JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap)
#log( 'insert_pdf(): calling self._reset_page_refs()')
self._reset_page_refs()
if links:
#log( 'insert_pdf(): calling self._do_links()')
self._do_links(docsrc, from_page = from_page, to_page = to_page, start_at = sa)
if final == 1:
self.Graftmaps[isrt] = None
#log( 'insert_pdf(): returning')
@property
def is_dirty(self):
pdf = _as_pdf_document(self)
if not pdf.m_internal:
return False
r = mupdf.pdf_has_unsaved_changes(pdf)
return True if r else False
@property
def is_fast_webaccess(self):
'''
Check whether we have a linearized PDF.
'''
pdf = _as_pdf_document(self)
if pdf.m_internal:
return mupdf.pdf_doc_was_linearized(pdf)
return False # gracefully handle non-PDF
@property
def is_form_pdf(self):
"""Either False or PDF field count."""
pdf = _as_pdf_document(self)
if not pdf.m_internal:
return False
count = -1
try:
fields = mupdf.pdf_dict_getl(
mupdf.pdf_trailer(pdf),
mupdf.PDF_ENUM_NAME_Root,
mupdf.PDF_ENUM_NAME_AcroForm,
mupdf.PDF_ENUM_NAME_Fields,
)
if mupdf.pdf_is_array(fields):
count = mupdf.pdf_array_len(fields)
except Exception:
if g_exceptions_verbose: exception_info()
return False
if count >= 0:
return count
return False
@property
def is_pdf(self):
"""Check for PDF."""
if isinstance(self.this, mupdf.PdfDocument):
return True
# Avoid calling self.this.specifics() because it will end up creating
# a new PdfDocument which will call pdf_create_document(), which is ok
# but a little unnecessary.
#
if mupdf.ll_pdf_specifics(self.this.m_internal):
ret = True
else:
ret = False
return ret
@property
def is_reflowable(self):
"""Check if document is layoutable."""
if self.is_closed:
raise ValueError("document closed")
return bool(mupdf.fz_is_document_reflowable(self))
@property
def is_repaired(self):
"""Check whether PDF was repaired."""
pdf = _as_pdf_document(self)
if not pdf.m_internal:
return False
r = mupdf.pdf_was_repaired(pdf)
if r:
return True
return False
def journal_can_do(self):
"""Show if undo and / or redo are possible."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
undo=0
redo=0
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
undo = mupdf.pdf_can_undo(pdf)
redo = mupdf.pdf_can_redo(pdf)
return {'undo': bool(undo), 'redo': bool(redo)}
def journal_enable(self):
"""Activate document journalling."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
mupdf.pdf_enable_journal(pdf)
def journal_is_enabled(self):
"""Check if journalling is enabled."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
enabled = pdf.m_internal and pdf.m_internal.journal
return enabled
def journal_load(self, filename):
"""Load a journal from a file."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
if isinstance(filename, str):
mupdf.pdf_load_journal(pdf, filename)
else:
res = JM_BufferFromBytes(filename)
stm = mupdf.fz_open_buffer(res)
mupdf.pdf_deserialise_journal(pdf, stm)
if not pdf.m_internal.journal:
RAISEPY( "Journal and document do not match", JM_Exc_FileDataError)
def journal_op_name(self, step):
"""Show operation name for given step."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
name = mupdf.pdf_undoredo_step(pdf, step)
return name
def journal_position(self):
"""Show journalling state."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
steps=0
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
rc, steps = mupdf.pdf_undoredo_state(pdf)
return rc, steps
def journal_redo(self):
"""Move forward in the journal."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
mupdf.pdf_redo(pdf)
return True
def journal_save(self, filename):
"""Save journal to a file."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
if isinstance(filename, str):
mupdf.pdf_save_journal(pdf, filename)
else:
out = JM_new_output_fileptr(filename)
mupdf.pdf_write_journal(pdf, out)
out.fz_close_output()
def journal_start_op(self, name=None):
"""Begin a journalling operation."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
if not pdf.m_internal.journal:
raise RuntimeError( "Journalling not enabled")
if name:
mupdf.pdf_begin_operation(pdf, name)
else:
mupdf.pdf_begin_implicit_operation(pdf)
def journal_stop_op(self):
"""End a journalling operation."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
mupdf.pdf_end_operation(pdf)
def journal_undo(self):
"""Move backwards in the journal."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
mupdf.pdf_undo(pdf)
return True
@property
def language(self):
"""Document language."""
pdf = _as_pdf_document(self)
if not pdf:
return
lang = mupdf.pdf_document_language(pdf)
if lang == mupdf.FZ_LANG_UNSET:
return
if mupdf_version_tuple < (1, 23, 7):
assert 0, 'not implemented yet'
return mupdf.fz_string_from_text_language2(lang)
@property
def last_location(self):
"""Id (chapter, page) of last page."""
if self.is_closed:
raise ValueError("document closed")
last_loc = mupdf.fz_last_page(self.this)
return last_loc.chapter, last_loc.page
def layer_ui_configs(self):
"""Show OC visibility status modifiable by user."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
info = mupdf.PdfLayerConfigUi()
n = mupdf.pdf_count_layer_config_ui( pdf)
rc = []
for i in range(n):
mupdf.pdf_layer_config_ui_info( pdf, i, info)
if info.type == 1:
type_ = "checkbox"
elif info.type == 2:
type_ = "radiobox"
else:
type_ = "label"
item = {
"number": i,
"text": info.text,
"depth": info.depth,
"type": type_,
"on": info.selected,
"locked": info.locked,
}
rc.append(item)
return rc
def layout(self, rect=None, width=0, height=0, fontsize=11):
"""Re-layout a reflowable document."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
doc = self.this
if not mupdf.fz_is_document_reflowable( doc):
return
w = width
h = height
r = JM_rect_from_py(rect)
if not mupdf.fz_is_infinite_rect(r):
w = r.x1 - r.x0
h = r.y1 - r.y0
if w <= 0.0 or h <= 0.0:
raise ValueError( "bad page size")
mupdf.fz_layout_document( doc, w, h, fontsize)
self._reset_page_refs()
self.init_doc()
def load_page(self, page_id):
"""Load a page.
'page_id' is either a 0-based page number or a tuple (chapter, pno),
with chapter number and page number within that chapter.
"""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if page_id is None:
page_id = 0
if page_id not in self:
raise ValueError("page not in document")
if type(page_id) is int and page_id < 0:
np = self.page_count
while page_id < 0:
page_id += np
if isinstance(page_id, int):
page = mupdf.fz_load_page(self.this, page_id)
else:
chapter, pagenum = page_id
page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum)
val = Page(page, self)
val.thisown = True
val.parent = self
self._page_refs[id(val)] = val
val._annot_refs = weakref.WeakValueDictionary()
val.number = page_id
return val
def location_from_page_number(self, pno):
"""Convert pno to (chapter, page)."""
if self.is_closed:
raise ValueError("document closed")
this_doc = self.this
loc = mupdf.fz_make_location(-1, -1)
page_count = mupdf.fz_count_pages(this_doc)
while pno < 0:
pno += page_count
if pno >= page_count:
raise ValueError( MSG_BAD_PAGENO)
loc = mupdf.fz_location_from_page_number(this_doc, pno)
return loc.chapter, loc.page
def make_bookmark(self, loc):
"""Make a page pointer before layouting document."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
loc = mupdf.FzLocation(*loc)
mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal())
return mark
@property
def markinfo(self) -> dict:
"""Return the PDF MarkInfo value."""
xref = self.pdf_catalog()
if xref == 0:
return None
rc = self.xref_get_key(xref, "MarkInfo")
if rc[0] == "null":
return {}
if rc[0] == "xref":
xref = int(rc[1].split()[0])
val = self.xref_object(xref, compressed=True)
elif rc[0] == "dict":
val = rc[1]
else:
val = None
if val is None or not (val[:2] == "<<" and val[-2:] == ">>"):
return {}
valid = {"Marked": False, "UserProperties": False, "Suspects": False}
val = val[2:-2].split("/")
for v in val[1:]:
try:
key, value = v.split()
except Exception:
if g_exceptions_verbose > 1: exception_info()
return valid
if value == "true":
valid[key] = True
return valid
def move_page(self, pno: int, to: int =-1):
"""Move a page within a PDF document.
Args:
pno: source page number.
to: put before this page, '-1' means after last page.
"""
if self.is_closed:
raise ValueError("document closed")
page_count = len(self)
if (pno not in range(page_count) or to not in range(-1, page_count)):
raise ValueError("bad page number(s)")
before = 1
copy = 0
if to == -1:
to = page_count - 1
before = 0
return self._move_copy_page(pno, to, before, copy)
@property
def name(self):
return self._name
def need_appearances(self, value=None):
"""Get/set the NeedAppearances value."""
if not self.is_form_pdf:
return None
pdf = _as_pdf_document(self)
oldval = -1
appkey = "NeedAppearances"
form = mupdf.pdf_dict_getp(
mupdf.pdf_trailer(pdf),
"Root/AcroForm",
)
app = mupdf.pdf_dict_gets(form, appkey)
if mupdf.pdf_is_bool(app):
oldval = mupdf.pdf_to_bool(app)
if value:
mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE)
else:
mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE)
if value is None:
return oldval >= 0
return value
@property
def needs_pass(self):
"""Indicate password required."""
if self.is_closed:
raise ValueError("document closed")
document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
ret = mupdf.fz_needs_password( document)
return ret
def next_location(self, page_id):
"""Get (chapter, page) of next page."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if type(page_id) is int:
page_id = (0, page_id)
if page_id not in self:
raise ValueError("page id not in document")
if tuple(page_id) == self.last_location:
return ()
this_doc = _as_fz_document(self)
val = page_id[ 0]
if not isinstance(val, int):
RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError)
chapter = val
val = page_id[ 1]
pno = val
loc = mupdf.fz_make_location(chapter, pno)
next_loc = mupdf.fz_next_page( this_doc, loc)
return next_loc.chapter, next_loc.page
def page_annot_xrefs(self, n):
if g_use_extra:
return extra.page_annot_xrefs( self.this, n)
if isinstance(self.this, mupdf.PdfDocument):
page_count = mupdf.pdf_count_pages(self.this)
pdf_document = self.this
else:
page_count = mupdf.fz_count_pages(self.this)
pdf_document = _as_pdf_document(self)
while n < 0:
n += page_count
if n > page_count:
raise ValueError( MSG_BAD_PAGENO)
page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n)
annots = JM_get_annot_xref_list(page_obj)
return annots
@property
def page_count(self):
"""Number of pages."""
if self.is_closed:
raise ValueError('document closed')
if g_use_extra:
return self.page_count2(self)
if isinstance( self.this, mupdf.FzDocument):
return mupdf.fz_count_pages( self.this)
else:
return mupdf.pdf_count_pages( self.this)
def page_cropbox(self, pno):
"""Get CropBox of page number (without loading page)."""
if self.is_closed:
raise ValueError("document closed")
this_doc = self.this
page_count = mupdf.fz_count_pages( this_doc)
n = pno
while n < 0:
n += page_count
pdf = _as_pdf_document(self)
if n >= page_count:
raise ValueError( MSG_BAD_PAGENO)
ASSERT_PDF(pdf)
pageref = mupdf.pdf_lookup_page_obj( pdf, n)
cropbox = JM_cropbox(pageref)
val = JM_py_from_rect(cropbox)
val = Rect(val)
return val
def page_number_from_location(self, page_id):
"""Convert (chapter, pno) to page number."""
if type(page_id) is int:
np = self.page_count
while page_id < 0:
page_id += np
page_id = (0, page_id)
if page_id not in self:
raise ValueError("page id not in document")
chapter, pno = page_id
loc = mupdf.fz_make_location( chapter, pno)
page_n = mupdf.fz_page_number_from_location( self.this, loc)
return page_n
def page_xref(self, pno):
"""Get xref of page number."""
if g_use_extra:
return extra.page_xref( self.this, pno)
if self.is_closed:
raise ValueError("document closed")
page_count = mupdf.fz_count_pages(self.this)
n = pno
while n < 0:
n += page_count
pdf = _as_pdf_document(self)
xref = 0
if n >= page_count:
raise ValueError( MSG_BAD_PAGENO)
ASSERT_PDF(pdf)
xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n))
return xref
@property
def pagelayout(self) -> str:
"""Return the PDF PageLayout value.
"""
xref = self.pdf_catalog()
if xref == 0:
return None
rc = self.xref_get_key(xref, "PageLayout")
if rc[0] == "null":
return "SinglePage"
if rc[0] == "name":
return rc[1][1:]
return "SinglePage"
@property
def pagemode(self) -> str:
"""Return the PDF PageMode value.
"""
xref = self.pdf_catalog()
if xref == 0:
return None
rc = self.xref_get_key(xref, "PageMode")
if rc[0] == "null":
return "UseNone"
if rc[0] == "name":
return rc[1][1:]
return "UseNone"
if sys.implementation.version < (3, 9):
# Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`.
_pages_ret = collections.abc.Iterable
else:
_pages_ret = collections.abc.Iterable[Page]
def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret:
"""Return a generator iterator over a page range.
Arguments have the same meaning as for the range() built-in.
"""
# set the start value
start = start or 0
while start < 0:
start += self.page_count
if start not in range(self.page_count):
raise ValueError("bad start page number")
# set the stop value
stop = stop if stop is not None and stop <= self.page_count else self.page_count
# set the step value
if step == 0:
raise ValueError("arg 3 must not be zero")
if step is None:
if start > stop:
step = -1
else:
step = 1
for pno in range(start, stop, step):
yield (self.load_page(pno))
def pdf_catalog(self):
"""Get xref of PDF catalog."""
pdf = _as_pdf_document(self)
xref = 0
if not pdf:
return xref
root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
xref = mupdf.pdf_to_num(root)
return xref
def pdf_trailer(self, compressed=0, ascii=0):
"""Get PDF trailer as a string."""
return self.xref_object(-1, compressed=compressed, ascii=ascii)
@property
def permissions(self):
"""Document permissions."""
if self.is_encrypted:
return 0
doc =self.this
pdf = mupdf.pdf_document_from_fz_document(doc)
# for PDF return result of standard function
if pdf.m_internal:
return mupdf.pdf_document_permissions(pdf)
# otherwise simulate the PDF return value
perm = 0xFFFFFFFC # all permissions granted
# now switch off where needed
if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT):
perm = perm ^ mupdf.PDF_PERM_PRINT
if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT):
perm = perm ^ mupdf.PDF_PERM_MODIFY
if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY):
perm = perm ^ mupdf.PDF_PERM_COPY
if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE):
perm = perm ^ mupdf.PDF_PERM_ANNOTATE
return perm
def prev_location(self, page_id):
"""Get (chapter, page) of previous page."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if type(page_id) is int:
page_id = (0, page_id)
if page_id not in self:
raise ValueError("page id not in document")
if page_id == (0, 0):
return ()
chapter, pno = page_id
loc = mupdf.fz_make_location(chapter, pno)
prev_loc = mupdf.fz_previous_page(self.this, loc)
return prev_loc.chapter, prev_loc.page
def reload_page(self, page: Page) -> Page:
"""Make a fresh copy of a page."""
old_annots = {} # copy annot references to here
pno = page.number # save the page number
for k, v in page._annot_refs.items(): # save the annot dictionary
old_annots[k] = v
# When we call `self.load_page()` below, it will end up in
# fz_load_chapter_page(), which will return any matching page in the
# document's list of non-ref-counted loaded pages, instead of actually
# reloading the page.
#
# We want to assert that we have actually reloaded the fz_page, and not
# simply returned the same `fz_page*` pointer from the document's list
# of non-ref-counted loaded pages.
#
# So we first remove our reference to the `fz_page*`. This will
# decrement .refs, and if .refs was 1, this is guaranteed to free the
# `fz_page*` and remove it from the document's list if it was there. So
# we are guaranteed that our returned `fz_page*` is from a genuine
# reload, even if it happens to reuse the original block of memory.
#
# However if the original .refs is greater than one, there must be
# other references to the `fz_page` somewhere, and we require that
# these other references are not keeping the page in the document's
# list. We check that we are returning a newly loaded page by
# asserting that our returned `fz_page*` is different from the original
# `fz_page*` - the original was not freed, so a new `fz_page` cannot
# reuse the same block of memory.
#
refs_old = page.this.m_internal.refs
m_internal_old = page.this.m_internal_value()
page.this = None
page._erase() # remove the page
page = None
TOOLS.store_shrink(100)
page = self.load_page(pno) # reload the page
# copy annot refs over to the new dictionary
#page_proxy = weakref.proxy(page)
for k, v in old_annots.items():
annot = old_annots[k]
#annot.parent = page_proxy # refresh parent to new page
page._annot_refs[k] = annot
if refs_old == 1:
# We know that `page.this = None` will have decremented the ref
# count to zero so we are guaranteed that the new `fz_page` is a
# new page even if it happens to have reused the same block of
# memory.
pass
else:
# Check that the new `fz_page*` is different from the original.
m_internal_new = page.this.m_internal_value()
assert m_internal_new != m_internal_old, \
f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
return page
def resolve_link(self, uri=None, chapters=0):
"""Calculate internal link destination.
Args:
uri: (str) some Link.uri
chapters: (bool) whether to use (chapter, page) format
Returns:
(page_id, x, y) where x, y are point coordinates on the page.
page_id is either page number (if chapters=0), or (chapter, pno).
"""
if not uri:
if chapters:
return (-1, -1), 0, 0
return -1, 0, 0
try:
loc, xp, yp = mupdf.fz_resolve_link(self.this, uri)
except Exception:
if g_exceptions_verbose: exception_info()
if chapters:
return (-1, -1), 0, 0
return -1, 0, 0
if chapters:
return (loc.chapter, loc.page), xp, yp
pno = mupdf.fz_page_number_from_location(self.this, loc)
return pno, xp, yp
def resolve_names(self):
"""Convert the PDF's destination names into a Python dict.
The only parameter is the pymupdf.Document.
All names found in the catalog under keys "/Dests" and "/Names/Dests" are
being included.
Returns:
A dcitionary with the following layout:
- key: (str) the name
- value: (dict) with the following layout:
* "page": target page number (0-based). If no page number found -1.
* "to": (x, y) target point on page - currently in PDF coordinates,
i.e. point (0,0) is the bottom-left of the page.
* "zoom": (float) the zoom factor
* "dest": (str) only occurs if the target location on the page has
not been provided as "/XYZ" or if no page number was found.
Examples:
{'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0},
'__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}}
or
'21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ...
"""
if hasattr(self, "_resolved_names"): # do not execute multiple times!
return self._resolved_names
# this is a backward listing of page xref to page number
page_xrefs = {self.page_xref(i): i for i in range(self.page_count)}
def obj_string(obj):
"""Return string version of a PDF object definition."""
buffer = mupdf.fz_new_buffer(512)
output = mupdf.FzOutput(buffer)
mupdf.pdf_print_obj(output, obj, 1, 0)
output.fz_close_output()
return JM_UnicodeFromBuffer(buffer)
def get_array(val):
"""Generate value of one item of the names dictionary."""
templ_dict = {"page": -1, "dest": ""} # value template
if val.pdf_is_indirect():
val = mupdf.pdf_resolve_indirect(val)
if val.pdf_is_array():
array = obj_string(val)
elif val.pdf_is_dict():
array = obj_string(mupdf.pdf_dict_gets(val, "D"))
else: # if all fails return the empty template
return templ_dict
# replace PDF "null" by zero, omit the square brackets
array = array.replace("null", "0")[1:-1]
# find stuff before first "/"
idx = array.find("/")
if idx < 1: # this has no target page spec
templ_dict["dest"] = array # return the orig. string
return templ_dict
subval = array[:idx] # stuff before "/"
array = array[idx:] # stuff from "/" onwards
templ_dict["dest"] = array
# if we start with /XYZ: extract x, y, zoom
if array.startswith("/XYZ"):
del templ_dict["dest"] # don't return orig string in this case
arr_t = array.split()[1:]
x, y, z = tuple(map(float, arr_t))
templ_dict["to"] = (x, y)
templ_dict["zoom"] = z
# extract page number
if "0 R" in subval: # page xref given?
templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1)
else: # naked page number given
templ_dict["page"] = int(subval)
return templ_dict
def fill_dict(dest_dict, pdf_dict):
"""Generate name resolution items for pdf_dict.
This may be either "/Names/Dests" or just "/Dests"
"""
# length of the PDF dictionary
name_count = mupdf.pdf_dict_len(pdf_dict)
# extract key-val of each dict item
for i in range(name_count):
key = mupdf.pdf_dict_get_key(pdf_dict, i)
val = mupdf.pdf_dict_get_val(pdf_dict, i)
if key.pdf_is_name(): # this should always be true!
dict_key = key.pdf_to_name()
else:
message(f"key {i} is no /Name")
dict_key = None
if dict_key:
dest_dict[dict_key] = get_array(val) # store key/value in dict
# access underlying PDF document of fz Document
pdf = mupdf.pdf_document_from_fz_document(self)
# access PDF catalog
catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root")
dest_dict = {}
# make PDF_NAME(Dests)
dests = mupdf.pdf_new_name("Dests")
# extract destinations old style (PDF 1.1)
old_dests = mupdf.pdf_dict_get(catalog, dests)
if old_dests.pdf_is_dict():
fill_dict(dest_dict, old_dests)
# extract destinations new style (PDF 1.2+)
tree = mupdf.pdf_load_name_tree(pdf, dests)
if tree.pdf_is_dict():
fill_dict(dest_dict, tree)
self._resolved_names = dest_dict # store result or reuse
return dest_dict
def save(
self,
filename,
garbage=0,
clean=0,
deflate=0,
deflate_images=0,
deflate_fonts=0,
incremental=0,
ascii=0,
expand=0,
linear=0,
no_new_id=0,
appearance=0,
pretty=0,
encryption=1,
permissions=4095,
owner_pw=None,
user_pw=None,
preserve_metadata=1,
use_objstms=0,
compression_effort=0,
):
# From %pythonprepend save
#
"""Save PDF to file, pathlib.Path or file pointer."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if type(filename) is str:
pass
elif hasattr(filename, "open"): # assume: pathlib.Path
filename = str(filename)
elif hasattr(filename, "name"): # assume: file object
filename = filename.name
elif not hasattr(filename, "seek"): # assume file object
raise ValueError("filename must be str, Path or file object")
if filename == self.name and not incremental:
raise ValueError("save to original must be incremental")
if self.page_count < 1:
raise ValueError("cannot save with zero pages")
if incremental:
if self.name != filename or self.stream:
raise ValueError("incremental needs original file")
if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40:
raise ValueError("password length must not exceed 40")
pdf = _as_pdf_document(self)
opts = mupdf.PdfWriteOptions()
opts.do_incremental = incremental
opts.do_ascii = ascii
opts.do_compress = deflate
opts.do_compress_images = deflate_images
opts.do_compress_fonts = deflate_fonts
opts.do_decompress = expand
opts.do_garbage = garbage
opts.do_pretty = pretty
opts.do_linear = linear
opts.do_clean = clean
opts.do_sanitize = clean
opts.dont_regenerate_id = no_new_id
opts.do_appearance = appearance
opts.do_encrypt = encryption
opts.permissions = permissions
if owner_pw is not None:
opts.opwd_utf8_set_value(owner_pw)
elif user_pw is not None:
opts.opwd_utf8_set_value(user_pw)
if user_pw is not None:
opts.upwd_utf8_set_value(user_pw)
opts.do_preserve_metadata = preserve_metadata
opts.do_use_objstms = use_objstms
opts.compression_effort = compression_effort
out = None
ASSERT_PDF(pdf)
pdf.m_internal.resynth_required = 0
JM_embedded_clean(pdf)
if no_new_id == 0:
JM_ensure_identity(pdf)
if isinstance(filename, str):
#log( 'calling mupdf.pdf_save_document()')
mupdf.pdf_save_document(pdf, filename, opts)
else:
out = JM_new_output_fileptr(filename)
#log( f'{type(out)=} {type(out.this)=}')
mupdf.pdf_write_document(pdf, out, opts)
out.fz_close_output()
def save_snapshot(self, filename):
"""Save a file snapshot suitable for journalling."""
if self.is_closed:
raise ValueError("doc is closed")
if type(filename) is str:
pass
elif hasattr(filename, "open"): # assume: pathlib.Path
filename = str(filename)
elif hasattr(filename, "name"): # assume: file object
filename = filename.name
else:
raise ValueError("filename must be str, Path or file object")
if filename == self.name:
raise ValueError("cannot snapshot to original")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
mupdf.pdf_save_snapshot(pdf, filename)
def saveIncr(self):
""" Save PDF incrementally"""
return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
def select(self, pyliste):
"""Build sub-pdf with page numbers in the list."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if not self.is_pdf:
raise ValueError("is no PDF")
if not hasattr(pyliste, "__getitem__"):
raise ValueError("sequence required")
valid_range = range(len(self))
if (len(pyliste) == 0
or min(pyliste) not in valid_range
or max(pyliste) not in valid_range
):
raise ValueError("bad page number(s)")
# get underlying pdf document,
pdf = _as_pdf_document(self)
# create page sub-pdf via pdf_rearrange_pages2().
#
if mupdf_version_tuple >= (1, 24):
mupdf.pdf_rearrange_pages2(pdf, pyliste)
else:
# mupdf.pdf_rearrange_pages2() not available.
extra.rearrange_pages2(pdf, tuple(pyliste))
# remove any existing pages with their kids
self._reset_page_refs()
def set_language(self, language=None):
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
if not language:
lang = mupdf.FZ_LANG_UNSET
else:
lang = mupdf.fz_text_language_from_string(language)
mupdf.pdf_set_document_language(pdf, lang)
return True
def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None):
"""Set the PDF keys /ON, /OFF, /RBGroups of an OC layer."""
if self.is_closed:
raise ValueError("document closed")
ocgs = set(self.get_ocgs().keys())
if ocgs == set():
raise ValueError("document has no optional content")
if on:
if type(on) not in (list, tuple):
raise ValueError("bad type: 'on'")
s = set(on).difference(ocgs)
if s != set():
raise ValueError("bad OCGs in 'on': %s" % s)
if off:
if type(off) not in (list, tuple):
raise ValueError("bad type: 'off'")
s = set(off).difference(ocgs)
if s != set():
raise ValueError("bad OCGs in 'off': %s" % s)
if locked:
if type(locked) not in (list, tuple):
raise ValueError("bad type: 'locked'")
s = set(locked).difference(ocgs)
if s != set():
raise ValueError("bad OCGs in 'locked': %s" % s)
if rbgroups:
if type(rbgroups) not in (list, tuple):
raise ValueError("bad type: 'rbgroups'")
for x in rbgroups:
if not type(x) in (list, tuple):
raise ValueError("bad RBGroup '%s'" % x)
s = set(x).difference(ocgs)
if s != set():
raise ValueError("bad OCGs in RBGroup: %s" % s)
if basestate:
basestate = str(basestate).upper()
if basestate == "UNCHANGED":
basestate = "Unchanged"
if basestate not in ("ON", "OFF", "Unchanged"):
raise ValueError("bad 'basestate'")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
ocp = mupdf.pdf_dict_getl(
mupdf.pdf_trailer( pdf),
PDF_NAME('Root'),
PDF_NAME('OCProperties'),
)
if not ocp.m_internal:
return
if config == -1:
obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
else:
obj = mupdf.pdf_array_get(
mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
config,
)
if not obj.m_internal:
raise ValueError( MSG_BAD_OC_CONFIG)
JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked)
mupdf.ll_pdf_read_ocg( pdf.m_internal)
def set_layer_ui_config(self, number, action=0):
"""Set / unset OC intent configuration."""
# The user might have given the name instead of sequence number,
# so select by that name and continue with corresp. number
if isinstance(number, str):
select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number]
if select == []:
raise ValueError(f"bad OCG '{number}'.")
number = select[0] # this is the number for the name
pdf = _as_pdf_document(self)
assert pdf
if action == 1:
mupdf.pdf_toggle_layer_config_ui(pdf, number)
elif action == 2:
mupdf.pdf_deselect_layer_config_ui(pdf, number)
else:
mupdf.pdf_select_layer_config_ui(pdf, number)
def set_markinfo(self, markinfo: dict) -> bool:
"""Set the PDF MarkInfo values."""
xref = self.pdf_catalog()
if xref == 0:
raise ValueError("not a PDF")
if not markinfo or not isinstance(markinfo, dict):
return False
valid = {"Marked": False, "UserProperties": False, "Suspects": False}
if not set(valid.keys()).issuperset(markinfo.keys()):
badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}"
raise ValueError(badkeys)
pdfdict = "<<"
valid.update(markinfo)
for key, value in valid.items():
value=str(value).lower()
if value not in ("true", "false"):
raise ValueError(f"bad key value '{key}': '{value}'")
pdfdict += f"/{key} {value}"
pdfdict += ">>"
self.xref_set_key(xref, "MarkInfo", pdfdict)
return True
def set_pagelayout(self, pagelayout: str):
"""Set the PDF PageLayout value."""
valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
xref = self.pdf_catalog()
if xref == 0:
raise ValueError("not a PDF")
if not pagelayout:
raise ValueError("bad PageLayout value")
if pagelayout[0] == "/":
pagelayout = pagelayout[1:]
for v in valid:
if pagelayout.lower() == v.lower():
self.xref_set_key(xref, "PageLayout", f"/{v}")
return True
raise ValueError("bad PageLayout value")
def set_pagemode(self, pagemode: str):
"""Set the PDF PageMode value."""
valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments")
xref = self.pdf_catalog()
if xref == 0:
raise ValueError("not a PDF")
if not pagemode:
raise ValueError("bad PageMode value")
if pagemode[0] == "/":
pagemode = pagemode[1:]
for v in valid:
if pagemode.lower() == v.lower():
self.xref_set_key(xref, "PageMode", f"/{v}")
return True
raise ValueError("bad PageMode value")
def set_xml_metadata(self, metadata):
"""Store XML document level metadata."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
if not root.m_internal:
RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8'))
xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
if xml.m_internal:
JM_update_stream( pdf, xml, res, 0)
else:
xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
def switch_layer(self, config, as_default=0):
"""Activate an OC layer."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
cfgs = mupdf.pdf_dict_getl(
mupdf.pdf_trailer( pdf),
PDF_NAME('Root'),
PDF_NAME('OCProperties'),
PDF_NAME('Configs')
)
if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs):
if config < 1:
return
raise ValueError( MSG_BAD_OC_LAYER)
if config < 0:
return
mupdf.pdf_select_layer_config( pdf, config)
if as_default:
mupdf.pdf_set_layer_config_as_default( pdf)
mupdf.ll_pdf_read_ocg( pdf.m_internal)
def update_object(self, xref, text, page=None):
"""Replace object definition source."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len(pdf)
if not _INRANGE(xref, 1, xreflen-1):
RAISEPY("bad xref", MSG_BAD_XREF, PyExc_ValueError)
ENSURE_OPERATION(pdf)
# create new object with passed-in string
new_obj = JM_pdf_obj_from_str(pdf, text)
mupdf.pdf_update_object(pdf, xref, new_obj)
if page:
JM_refresh_links( _as_pdf_page(page))
def update_stream(self, xref=0, stream=None, new=1, compress=1):
"""Replace xref stream part."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
xreflen = mupdf.pdf_xref_len(pdf)
if xref < 1 or xref > xreflen:
raise ValueError( MSG_BAD_XREF)
# get the object
obj = mupdf.pdf_new_indirect(pdf, xref, 0)
if not mupdf.pdf_is_dict(obj):
raise ValueError( MSG_IS_NO_DICT)
res = JM_BufferFromBytes(stream)
if not res:
raise TypeError( MSG_BAD_BUFFER)
JM_update_stream(pdf, obj, res, compress)
pdf.dirty = 1
@property
def version_count(self):
'''
Count versions of PDF document.
'''
pdf = _as_pdf_document(self)
if pdf.m_internal:
return mupdf.pdf_count_versions(pdf)
return 0
def write(
self,
garbage=False,
clean=False,
deflate=False,
deflate_images=False,
deflate_fonts=False,
incremental=False,
ascii=False,
expand=False,
linear=False,
no_new_id=False,
appearance=False,
pretty=False,
encryption=1,
permissions=4095,
owner_pw=None,
user_pw=None,
preserve_metadata=1,
use_objstms=0,
compression_effort=0,
):
from io import BytesIO
bio = BytesIO()
self.save(
bio,
garbage=garbage,
clean=clean,
no_new_id=no_new_id,
appearance=appearance,
deflate=deflate,
deflate_images=deflate_images,
deflate_fonts=deflate_fonts,
incremental=incremental,
ascii=ascii,
expand=expand,
linear=linear,
pretty=pretty,
encryption=encryption,
permissions=permissions,
owner_pw=owner_pw,
user_pw=user_pw,
preserve_metadata=preserve_metadata,
use_objstms=use_objstms,
compression_effort=compression_effort,
)
return bio.getvalue()
@property
def xref(self):
"""PDF xref number of page."""
CheckParent(self)
return self.parent.page_xref(self.number)
def xref_get_key(self, xref, key):
"""Get PDF dict key value of object at 'xref'."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len(pdf)
if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
raise ValueError( MSG_BAD_XREF)
if xref > 0:
obj = mupdf.pdf_load_object(pdf, xref)
else:
obj = mupdf.pdf_trailer(pdf)
if not obj.m_internal:
return ("null", "null")
subobj = mupdf.pdf_dict_getp(obj, key)
if not subobj.m_internal:
return ("null", "null")
text = None
if mupdf.pdf_is_indirect(subobj):
type = "xref"
text = "%i 0 R" % mupdf.pdf_to_num(subobj)
elif mupdf.pdf_is_array(subobj):
type = "array"
elif mupdf.pdf_is_dict(subobj):
type = "dict"
elif mupdf.pdf_is_int(subobj):
type = "int"
text = "%i" % mupdf.pdf_to_int(subobj)
elif mupdf.pdf_is_real(subobj):
type = "float"
elif mupdf.pdf_is_null(subobj):
type = "null"
text = "null"
elif mupdf.pdf_is_bool(subobj):
type = "bool"
if mupdf.pdf_to_bool(subobj):
text = "true"
else:
text = "false"
elif mupdf.pdf_is_name(subobj):
type = "name"
text = "/%s" % mupdf.pdf_to_name(subobj)
elif mupdf.pdf_is_string(subobj):
type = "string"
text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj))
else:
type = "unknown"
if text is None:
res = JM_object_to_buffer(subobj, 1, 0)
text = JM_UnicodeFromBuffer(res)
return (type, text)
def xref_get_keys(self, xref):
"""Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len( pdf)
if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
raise ValueError( MSG_BAD_XREF)
if xref > 0:
obj = mupdf.pdf_load_object( pdf, xref)
else:
obj = mupdf.pdf_trailer( pdf)
n = mupdf.pdf_dict_len( obj)
rc = []
if n == 0:
return rc
for i in range(n):
key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i))
rc.append(key)
return rc
def xref_is_font(self, xref):
"""Check if xref is a font object."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if self.xref_get_key(xref, "Type")[1] == "/Font":
return True
return False
def xref_is_image(self, xref):
"""Check if xref is an image object."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if self.xref_get_key(xref, "Subtype")[1] == "/Image":
return True
return False
def xref_is_stream(self, xref=0):
"""Check if xref is a stream object."""
pdf = _as_pdf_document(self)
if not pdf:
return False # not a PDF
return bool(mupdf.pdf_obj_num_is_stream(pdf, xref))
def xref_is_xobject(self, xref):
"""Check if xref is a form xobject."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
if self.xref_get_key(xref, "Subtype")[1] == "/Form":
return True
return False
def xref_length(self):
"""Get length of xref table."""
xreflen = 0
pdf = _as_pdf_document(self)
if pdf:
xreflen = mupdf.pdf_xref_len(pdf)
return xreflen
def xref_object(self, xref, compressed=0, ascii=0):
"""Get xref object source as a string."""
if self.is_closed:
raise ValueError("document closed")
if g_use_extra:
ret = extra.xref_object( self.this, xref, compressed, ascii)
return ret
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len(pdf)
if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
raise ValueError( MSG_BAD_XREF)
if xref > 0:
obj = mupdf.pdf_load_object(pdf, xref)
else:
obj = mupdf.pdf_trailer(pdf)
res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
text = JM_EscapeStrFromBuffer(res)
return text
def xref_set_key(self, xref, key, value):
"""Set the value of a PDF dictionary key."""
if self.is_closed:
raise ValueError("document closed")
if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}):
raise ValueError("bad 'key'")
if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set():
raise ValueError("bad 'value'")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len(pdf)
#if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
# THROWMSG("bad xref")
#if len(value) == 0:
# THROWMSG("bad 'value'")
#if len(key) == 0:
# THROWMSG("bad 'key'")
if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
raise ValueError( MSG_BAD_XREF)
if xref != -1:
obj = mupdf.pdf_load_object(pdf, xref)
else:
obj = mupdf.pdf_trailer(pdf)
new_obj = JM_set_object_value(obj, key, value)
if not new_obj.m_internal:
return # did not work: skip update
if xref != -1:
mupdf.pdf_update_object(pdf, xref, new_obj)
else:
n = mupdf.pdf_dict_len(new_obj)
for i in range(n):
mupdf.pdf_dict_put(
obj,
mupdf.pdf_dict_get_key(new_obj, i),
mupdf.pdf_dict_get_val(new_obj, i),
)
def xref_stream(self, xref):
"""Get decompressed xref stream."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len( pdf)
if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
raise ValueError( MSG_BAD_XREF)
if xref >= 0:
obj = mupdf.pdf_new_indirect( pdf, xref, 0)
else:
obj = mupdf.pdf_trailer( pdf)
r = None
if mupdf.pdf_is_stream( obj):
res = mupdf.pdf_load_stream_number( pdf, xref)
r = JM_BinFromBuffer( res)
return r
def xref_stream_raw(self, xref):
"""Get xref stream without decompression."""
if self.is_closed or self.is_encrypted:
raise ValueError("document closed or encrypted")
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len( pdf)
if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
raise ValueError( MSG_BAD_XREF)
if xref >= 0:
obj = mupdf.pdf_new_indirect( pdf, xref, 0)
else:
obj = mupdf.pdf_trailer( pdf)
r = None
if mupdf.pdf_is_stream( obj):
res = mupdf.pdf_load_raw_stream_number( pdf, xref)
r = JM_BinFromBuffer( res)
return r
def xref_xml_metadata(self):
"""Get xref of document XML metadata."""
pdf = _as_pdf_document(self)
ASSERT_PDF(pdf)
root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
if not root.m_internal:
RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
xref = 0
if xml.m_internal:
xref = mupdf.pdf_to_num( xml)
return xref
__slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
outline = property(lambda self: self._outline)
tobytes = write
is_stream = xref_is_stream
open = Document
class DocumentWriter:
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def __init__(self, path, options=''):
if isinstance( path, str):
pass
elif hasattr( path, 'absolute'):
path = str( path)
elif hasattr( path, 'name'):
path = path.name
if isinstance( path, str):
self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF)
else:
# Need to keep the Python JM_new_output_fileptr_Output instance
# alive for the lifetime of this DocumentWriter, otherwise calls
# to virtual methods implemented in Python fail. So we make it a
# member of this DocumentWriter.
#
# Unrelated to this, mupdf.FzDocumentWriter will set
# self._out.m_internal to null because ownership is passed in.
#
out = JM_new_output_fileptr( path)
self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF)
assert out.m_internal_value() == 0
assert hasattr( self.this, '_out')
def begin_page( self, mediabox):
mediabox2 = JM_rect_from_py(mediabox)
device = mupdf.fz_begin_page( self.this, mediabox2)
device_wrapper = DeviceWrapper( device)
return device_wrapper
def close( self):
mupdf.fz_close_document_writer( self.this)
def end_page( self):
mupdf.fz_end_page( self.this)
class Font:
def __del__(self):
if type(self) is not Font:
return None
def __init__(
self,
fontname=None,
fontfile=None,
fontbuffer=None,
script=0,
language=None,
ordering=-1,
is_bold=0,
is_italic=0,
is_serif=0,
embed=1,
):
if fontbuffer:
if hasattr(fontbuffer, "getvalue"):
fontbuffer = fontbuffer.getvalue()
elif isinstance(fontbuffer, bytearray):
fontbuffer = bytes(fontbuffer)
if not isinstance(fontbuffer, bytes):
raise ValueError("bad type: 'fontbuffer'")
if isinstance(fontname, str):
fname_lower = fontname.lower()
if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower:
message("Warning: did you mean a fontfile?")
if fname_lower in ("cjk", "china-t", "china-ts"):
ordering = 0
elif fname_lower.startswith("china-s"):
ordering = 1
elif fname_lower.startswith("korea"):
ordering = 3
elif fname_lower.startswith("japan"):
ordering = 2
elif fname_lower in fitz_fontdescriptors.keys():
import pymupdf_fonts # optional fonts
fontbuffer = pymupdf_fonts.myfont(fname_lower) # make a copy
fontname = None # ensure using fontbuffer only
del pymupdf_fonts # remove package again
elif ordering < 0:
fontname = Base14_fontdict.get(fontname, fontname)
lang = mupdf.fz_text_language_from_string(language)
font = JM_get_font(fontname, fontfile,
fontbuffer, script, lang, ordering,
is_bold, is_italic, is_serif, embed)
self.this = font
def __repr__(self):
return "Font('%s')" % self.name
def _valid_unicodes(self, arr):
# fixme
assert 0, 'Not implemented because implementation requires FT_Get_First_Char() etc.'
#font = self.this
#temp = arr[0]
#ptr = temp
#JM_valid_chars(font, ptr)
@property
def ascender(self):
"""Return the glyph ascender value."""
return mupdf.fz_font_ascender(self.this)
@property
def bbox(self):
return self.this.fz_font_bbox()
@property
def buffer(self):
buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
return mupdf.fz_buffer_extract_copy( buffer_)
def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
"""Return tuple of char lengths of unicode 'text' under a fontsize."""
lang = mupdf.fz_text_language_from_string(language)
rc = []
for ch in text:
c = ord(ch)
if small_caps:
gid = mupdf.fz_encode_character_sc(self.this, c)
if gid >= 0:
font = self.this
else:
gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang)
rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode))
return rc
@property
def descender(self):
"""Return the glyph descender value."""
return mupdf.fz_font_descender(self.this)
@property
def flags(self):
f = mupdf.ll_fz_font_flags(self.this.m_internal)
if not f:
return
assert isinstance( f, mupdf.fz_font_flags_t)
#log( '{=f}')
if mupdf_cppyy:
# cppyy includes remaining higher bits.
v = [f.is_mono]
def b(bits):
ret = v[0] & ((1 << bits)-1)
v[0] = v[0] >> bits
return ret
is_mono = b(1)
is_serif = b(1)
is_bold = b(1)
is_italic = b(1)
ft_substitute = b(1)
ft_stretch = b(1)
fake_bold = b(1)
fake_italic = b(1)
has_opentype = b(1)
invalid_bbox = b(1)
cjk_lang = b(1)
embed = b(1)
never_embed = b(1)
return {
"mono": is_mono if mupdf_cppyy else f.is_mono,
"serif": is_serif if mupdf_cppyy else f.is_serif,
"bold": is_bold if mupdf_cppyy else f.is_bold,
"italic": is_italic if mupdf_cppyy else f.is_italic,
"substitute": ft_substitute if mupdf_cppyy else f.ft_substitute,
"stretch": ft_stretch if mupdf_cppyy else f.ft_stretch,
"fake-bold": fake_bold if mupdf_cppyy else f.fake_bold,
"fake-italic": fake_italic if mupdf_cppyy else f.fake_italic,
"opentype": has_opentype if mupdf_cppyy else f.has_opentype,
"invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox,
'cjk': cjk_lang if mupdf_cppyy else f.cjk,
'cjk-lang': cjk_lang if mupdf_cppyy else f.cjk_lang,
'embed': embed if mupdf_cppyy else f.embed,
'never-embed': never_embed if mupdf_cppyy else f.never_embed,
}
def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0):
"""Return the glyph width of a unicode (font size 1)."""
lang = mupdf.fz_text_language_from_string(language)
if small_caps:
gid = mupdf.fz_encode_character_sc(self.this, chr_)
if gid >= 0:
font = self.this
else:
gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang)
return mupdf.fz_advance_glyph(font, gid, wmode)
def glyph_bbox(self, char, language=None, script=0, small_caps=0):
"""Return the glyph bbox of a unicode (font size 1)."""
lang = mupdf.fz_text_language_from_string(language)
if small_caps:
gid = mupdf.fz_encode_character_sc( self.this, char)
if gid >= 0:
font = self.this
else:
gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang)
return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix()))
@property
def glyph_count(self):
return self.this.m_internal.glyph_count
def glyph_name_to_unicode(self, name):
"""Return the unicode for a glyph name."""
return glyph_name_to_unicode(name)
def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0):
"""Check whether font has a glyph for this unicode."""
if fallback:
lang = mupdf.fz_text_language_from_string(language)
gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang)
else:
if small_caps:
gid = mupdf.fz_encode_character_sc(self.this, chr)
else:
gid = mupdf.fz_encode_character(self.this, chr)
return gid
@property
def is_bold(self):
return mupdf.fz_font_is_bold( self.this)
@property
def is_italic(self):
return mupdf.fz_font_is_italic( self.this)
@property
def is_monospaced(self):
return mupdf.fz_font_is_monospaced( self.this)
@property
def is_serif(self):
return mupdf.fz_font_is_serif( self.this)
@property
def is_writable(self):
return True # see pymupdf commit ef4056ee4da2
font = self.this
flags = mupdf.ll_fz_font_flags(font.m_internal)
if mupdf_cppyy:
# cppyy doesn't handle bitfields correctly.
import cppyy
ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags)
else:
ft_substitute = flags.ft_substitute
if ( mupdf.ll_fz_font_t3_procs(font.m_internal)
or ft_substitute
or not mupdf.pdf_font_writing_supported(font)
):
return False
return True
@property
def name(self):
ret = mupdf.fz_font_name(self.this)
#log( '{ret=}')
return ret
def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
"""Return length of unicode 'text' under a fontsize."""
thisfont = self.this
lang = mupdf.fz_text_language_from_string(language)
rc = 0
if not isinstance(text, str):
raise TypeError( MSG_BAD_TEXT)
for ch in text:
c = ord(ch)
if small_caps:
gid = mupdf.fz_encode_character_sc(thisfont, c)
if gid >= 0:
font = thisfont
else:
gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang)
rc += mupdf.fz_advance_glyph(font, gid, wmode)
rc *= fontsize
return rc
def unicode_to_glyph_name(self, ch):
"""Return the glyph name for a unicode."""
return unicode_to_glyph_name(ch)
def valid_codepoints(self):
'''
list of valid unicodes of a fz_font
'''
return []
# fixme: uses _valid_unicodes() which is not implemented.
from array import array
gc = self.glyph_count
cp = array("l", (0,) * gc)
arr = cp.buffer_info()
self._valid_unicodes(arr)
return array("l", sorted(set(cp))[1:])
class Graftmap:
def __del__(self):
if not type(self) is Graftmap:
return
self.thisown = False
def __init__(self, doc):
dst = _as_pdf_document(doc)
ASSERT_PDF(dst)
map_ = mupdf.pdf_new_graft_map(dst)
self.this = map_
self.thisown = True
class Link:
def __del__(self):
self._erase()
def __init__( self, this):
assert isinstance( this, mupdf.FzLink)
self.this = this
def __repr__(self):
CheckParent(self)
return "link on " + str(self.parent)
def __str__(self):
CheckParent(self)
return "link on " + str(self.parent)
def _border(self, doc, xref):
pdf = _as_pdf_document(doc)
if not pdf:
return
link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
if not link_obj.m_internal:
return
b = JM_annot_border(link_obj)
return b
def _colors(self, doc, xref):
pdf = _as_pdf_document(doc)
if not pdf:
return
link_obj = mupdf.pdf_new_indirect( pdf, xref, 0)
if not link_obj.m_internal:
raise ValueError( MSG_BAD_XREF)
b = JM_annot_colors( link_obj)
return b
def _erase(self):
self.parent = None
self.thisown = False
def _setBorder(self, border, doc, xref):
pdf = _as_pdf_document(doc)
if not pdf:
return
link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
if not link_obj.m_internal:
return
b = JM_annot_set_border(border, pdf, link_obj)
return b
@property
def border(self):
return self._border(self.parent.parent.this, self.xref)
@property
def colors(self):
return self._colors(self.parent.parent.this, self.xref)
@property
def dest(self):
"""Create link destination details."""
if hasattr(self, "parent") and self.parent is None:
raise ValueError("orphaned object: parent is None")
if self.parent.parent.is_closed or self.parent.parent.is_encrypted:
raise ValueError("document closed or encrypted")
doc = self.parent.parent
if self.is_external or self.uri.startswith("#"):
uri = None
else:
uri = doc.resolve_link(self.uri)
return linkDest(self, uri, doc)
@property
def flags(self)->int:
CheckParent(self)
doc = self.parent.parent
if not doc.is_pdf:
return 0
f = doc.xref_get_key(self.xref, "F")
if f[1] != "null":
return int(f[1])
return 0
@property
def is_external(self):
"""Flag the link as external."""
CheckParent(self)
if g_use_extra:
return extra.Link_is_external( self.this)
this_link = self.this
if not this_link.m_internal or not this_link.m_internal.uri:
return False
return bool( mupdf.fz_is_external_link( this_link.m_internal.uri))
@property
def next(self):
"""Next link."""
if not self.this.m_internal:
return None
CheckParent(self)
if 0 and g_use_extra:
val = extra.Link_next( self.this)
else:
val = self.this.next()
if not val.m_internal:
return None
val = Link( val)
if val:
val.thisown = True
val.parent = self.parent # copy owning page from prev link
val.parent._annot_refs[id(val)] = val
if self.xref > 0: # prev link has an xref
link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
idx = link_xrefs.index(self.xref)
val.xref = link_xrefs[idx + 1]
val.id = link_ids[idx + 1]
else:
val.xref = 0
val.id = ""
return val
@property
def rect(self):
"""Rectangle ('hot area')."""
CheckParent(self)
# utils.py:getLinkDict() appears to expect exceptions from us, so we
# ensure that we raise on error.
if not self.this or not self.this.m_internal:
raise Exception( 'self.this.m_internal not available')
assert self.this
assert self.this.m_internal
val = JM_py_from_rect( self.this.rect())
val = Rect(val)
return val
def set_border(self, border=None, width=0, dashes=None, style=None):
if type(border) is not dict:
border = {"width": width, "style": style, "dashes": dashes}
return self._setBorder(border, self.parent.parent.this, self.xref)
def set_colors(self, colors=None, stroke=None, fill=None):
"""Set border colors."""
CheckParent(self)
doc = self.parent.parent
if type(colors) is not dict:
colors = {"fill": fill, "stroke": stroke}
fill = colors.get("fill")
stroke = colors.get("stroke")
if fill is not None:
message("warning: links have no fill color")
if stroke in ([], ()):
doc.xref_set_key(self.xref, "C", "[]")
return
if hasattr(stroke, "__float__"):
stroke = [float(stroke)]
CheckColor(stroke)
assert len(stroke) in (1, 3, 4)
s = f"[{_format_g(stroke)}]"
doc.xref_set_key(self.xref, "C", s)
def set_flags(self, flags):
CheckParent(self)
doc = self.parent.parent
if not doc.is_pdf:
raise ValueError("is no PDF")
if not type(flags) is int:
raise ValueError("bad 'flags' value")
doc.xref_set_key(self.xref, "F", str(flags))
return None
@property
def uri(self):
"""Uri string."""
#CheckParent(self)
if g_use_extra:
return extra.link_uri(self.this)
this_link = self.this
return this_link.m_internal.uri if this_link.m_internal else ''
page = -1
class Matrix:
def __abs__(self):
return math.sqrt(sum([c*c for c in self]))
def __add__(self, m):
if hasattr(m, "__float__"):
return Matrix(self.a + m, self.b + m, self.c + m,
self.d + m, self.e + m, self.f + m)
if len(m) != 6:
raise ValueError("Matrix: bad seq len")
return Matrix(self.a + m[0], self.b + m[1], self.c + m[2],
self.d + m[3], self.e + m[4], self.f + m[5])
def __bool__(self):
return not (max(self) == min(self) == 0)
def __eq__(self, mat):
if not hasattr(mat, "__len__"):
return False
return len(mat) == 6 and bool(self - mat) is False
def __getitem__(self, i):
return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None):
"""
Matrix() - all zeros
Matrix(a, b, c, d, e, f)
Matrix(zoom-x, zoom-y) - zoom
Matrix(shear-x, shear-y, 1) - shear
Matrix(degree) - rotate
Matrix(Matrix) - new copy
Matrix(sequence) - from 'sequence'
Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix.
Explicit keyword args a, b, c, d, e, f override any earlier settings if
not None.
"""
if not args:
self.a = self.b = self.c = self.d = self.e = self.f = 0.0
elif len(args) > 6:
raise ValueError("Matrix: bad seq len")
elif len(args) == 6: # 6 numbers
self.a, self.b, self.c, self.d, self.e, self.f = map(float, args)
elif len(args) == 1: # either an angle or a sequ
if isinstance(args[0], mupdf.FzMatrix):
self.a = args[0].a
self.b = args[0].b
self.c = args[0].c
self.d = args[0].d
self.e = args[0].e
self.f = args[0].f
elif hasattr(args[0], "__float__"):
theta = math.radians(args[0])
c_ = round(math.cos(theta), 8)
s_ = round(math.sin(theta), 8)
self.a = self.d = c_
self.b = s_
self.c = -s_
self.e = self.f = 0.0
else:
self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0])
elif len(args) == 2 or len(args) == 3 and args[2] == 0:
self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \
0.0, 0.0, float(args[1]), 0.0, 0.0
elif len(args) == 3 and args[2] == 1:
self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \
float(args[1]), float(args[0]), 1.0, 0.0, 0.0
else:
raise ValueError("Matrix: bad args")
# Override with explicit args if specified.
if a is not None: self.a = a
if b is not None: self.b = b
if c is not None: self.c = c
if d is not None: self.d = d
if e is not None: self.e = e
if f is not None: self.f = f
def __invert__(self):
"""Calculate inverted matrix."""
m1 = Matrix()
m1.invert(self)
return m1
def __len__(self):
return 6
def __mul__(self, m):
if hasattr(m, "__float__"):
return Matrix(self.a * m, self.b * m, self.c * m,
self.d * m, self.e * m, self.f * m)
m1 = Matrix(1,1)
return m1.concat(self, m)
def __neg__(self):
return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f)
def __nonzero__(self):
return not (max(self) == min(self) == 0)
def __pos__(self):
return Matrix(self)
def __repr__(self):
return "Matrix" + str(tuple(self))
def __setitem__(self, i, v):
v = float(v)
if i == 0: self.a = v
elif i == 1: self.b = v
elif i == 2: self.c = v
elif i == 3: self.d = v
elif i == 4: self.e = v
elif i == 5: self.f = v
else:
raise IndexError("index out of range")
return
def __sub__(self, m):
if hasattr(m, "__float__"):
return Matrix(self.a - m, self.b - m, self.c - m,
self.d - m, self.e - m, self.f - m)
if len(m) != 6:
raise ValueError("Matrix: bad seq len")
return Matrix(self.a - m[0], self.b - m[1], self.c - m[2],
self.d - m[3], self.e - m[4], self.f - m[5])
def __truediv__(self, m):
if hasattr(m, "__float__"):
return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m,
self.d * 1./m, self.e * 1./m, self.f * 1./m)
m1 = util_invert_matrix(m)[1]
if not m1:
raise ZeroDivisionError("matrix not invertible")
m2 = Matrix(1,1)
return m2.concat(self, m1)
def concat(self, one, two):
"""Multiply two matrices and replace current one."""
if not len(one) == len(two) == 6:
raise ValueError("Matrix: bad seq len")
self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two)
return self
def invert(self, src=None):
"""Calculate the inverted matrix. Return 0 if successful and replace
current one. Else return 1 and do nothing.
"""
if src is None:
dst = util_invert_matrix(self)
else:
dst = util_invert_matrix(src)
if dst[0] == 1:
return 1
self.a, self.b, self.c, self.d, self.e, self.f = dst[1]
return 0
@property
def is_rectilinear(self):
"""True if rectangles are mapped to rectangles."""
return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \
(abs(self.a) < EPSILON and abs(self.d) < EPSILON)
def prerotate(self, theta):
"""Calculate pre rotation and replace current matrix."""
theta = float(theta)
while theta < 0: theta += 360
while theta >= 360: theta -= 360
if abs(0 - theta) < EPSILON:
pass
elif abs(90.0 - theta) < EPSILON:
a = self.a
b = self.b
self.a = self.c
self.b = self.d
self.c = -a
self.d = -b
elif abs(180.0 - theta) < EPSILON:
self.a = -self.a
self.b = -self.b
self.c = -self.c
self.d = -self.d
elif abs(270.0 - theta) < EPSILON:
a = self.a
b = self.b
self.a = -self.c
self.b = -self.d
self.c = a
self.d = b
else:
rad = math.radians(theta)
s = math.sin(rad)
c = math.cos(rad)
a = self.a
b = self.b
self.a = c * a + s * self.c
self.b = c * b + s * self.d
self.c =-s * a + c * self.c
self.d =-s * b + c * self.d
return self
def prescale(self, sx, sy):
"""Calculate pre scaling and replace current matrix."""
sx = float(sx)
sy = float(sy)
self.a *= sx
self.b *= sx
self.c *= sy
self.d *= sy
return self
def preshear(self, h, v):
"""Calculate pre shearing and replace current matrix."""
h = float(h)
v = float(v)
a, b = self.a, self.b
self.a += v * self.c
self.b += v * self.d
self.c += h * a
self.d += h * b
return self
def pretranslate(self, tx, ty):
"""Calculate pre translation and replace current matrix."""
tx = float(tx)
ty = float(ty)
self.e += tx * self.a + ty * self.c
self.f += tx * self.b + ty * self.d
return self
__inv__ = __invert__
__div__ = __truediv__
norm = __abs__
class IdentityMatrix(Matrix):
"""Identity matrix [1, 0, 0, 1, 0, 0]"""
def __hash__(self):
return hash((1,0,0,1,0,0))
def __init__(self):
Matrix.__init__(self, 1.0, 1.0)
def __repr__(self):
return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)"
def __setattr__(self, name, value):
if name in "ad":
self.__dict__[name] = 1.0
elif name in "bcef":
self.__dict__[name] = 0.0
else:
self.__dict__[name] = value
def checkargs(*args):
raise NotImplementedError("Identity is readonly")
Identity = IdentityMatrix()
class linkDest:
"""link or outline destination details"""
def __init__(self, obj, rlink, document=None):
isExt = obj.is_external
isInt = not isExt
self.dest = ""
self.file_spec = ""
self.flags = 0
self.is_map = False
self.is_uri = False
self.kind = LINK_NONE
self.lt = Point(0, 0)
self.named = dict()
self.new_window = ""
self.page = obj.page
self.rb = Point(0, 0)
self.uri = obj.uri
def uri_to_dict(uri):
items = self.uri[1:].split('&')
ret = dict()
for item in items:
eq = item.find('=')
if eq >= 0:
ret[item[:eq]] = item[eq+1:]
else:
ret[item] = None
return ret
if rlink and not self.uri.startswith("#"):
self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
if obj.is_external:
self.page = -1
self.kind = LINK_URI
if not self.uri:
self.page = -1
self.kind = LINK_NONE
if isInt and self.uri:
self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
if self.uri.startswith("#"):
self.kind = LINK_GOTO
m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri)
if m:
self.page = int(m.group(1)) - 1
self.lt = Point(float((m.group(3))), float(m.group(4)))
self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID
else:
m = re.match('^#page=([0-9]+)$', self.uri)
if m:
self.page = int(m.group(1)) - 1
else:
self.kind = LINK_NAMED
m = re.match('^#nameddest=(.*)', self.uri)
assert document
if document and m:
named = m.group(1)
self.named = document.resolve_names().get(named)
if self.named is None:
# document.resolve_names() does not contain an
# entry for `named` so use an empty dict.
self.named = dict()
self.named['nameddest'] = named
else:
self.named = uri_to_dict(self.uri[1:])
else:
self.kind = LINK_NAMED
self.named = uri_to_dict(self.uri)
if obj.is_external:
if not self.uri:
pass
elif self.uri.startswith("file:"):
self.file_spec = self.uri[5:]
if self.file_spec.startswith("//"):
self.file_spec = self.file_spec[2:]
self.is_uri = False
self.uri = ""
self.kind = LINK_LAUNCH
ftab = self.file_spec.split("#")
if len(ftab) == 2:
if ftab[1].startswith("page="):
self.kind = LINK_GOTOR
self.file_spec = ftab[0]
self.page = int(ftab[1].split("&")[0][5:]) - 1
elif ":" in self.uri:
self.is_uri = True
self.kind = LINK_URI
else:
self.is_uri = True
self.kind = LINK_LAUNCH
assert isinstance(self.named, dict)
class Widget:
'''
Class describing a PDF form field ("widget")
'''
def __init__(self):
self.border_color = None
self.border_style = "S"
self.border_width = 0
self.border_dashes = None
self.choice_values = None # choice fields only
self.rb_parent = None # radio buttons only: xref of owning parent
self.field_name = None # field name
self.field_label = None # field label
self.field_value = None
self.field_flags = 0
self.field_display = 0
self.field_type = 0 # valid range 1 through 7
self.field_type_string = None # field type as string
self.fill_color = None
self.button_caption = None # button caption
self.is_signed = None # True / False if signature
self.text_color = (0, 0, 0)
self.text_font = "Helv"
self.text_fontsize = 0
self.text_maxlen = 0 # text fields only
self.text_format = 0 # text fields only
self._text_da = "" # /DA = default apparance
self.script = None # JavaScript (/A)
self.script_stroke = None # JavaScript (/AA/K)
self.script_format = None # JavaScript (/AA/F)
self.script_change = None # JavaScript (/AA/V)
self.script_calc = None # JavaScript (/AA/C)
self.script_blur = None # JavaScript (/AA/Bl)
self.script_focus = None # JavaScript (/AA/Fo)
self.rect = None # annot value
self.xref = 0 # annot value
def __repr__(self):
#return "'%s' widget on %s" % (self.field_type_string, str(self.parent))
# No self.parent.
return f'Widget:(field_type={self.field_type_string} script={self.script})'
return "'%s' widget" % (self.field_type_string)
def _adjust_font(self):
"""Ensure text_font is from our list and correctly spelled.
"""
if not self.text_font:
self.text_font = "Helv"
return
valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb")
for f in valid_fonts:
if self.text_font.lower() == f.lower():
self.text_font = f
return
self.text_font = "Helv"
return
def _checker(self):
"""Any widget type checks.
"""
if self.field_type not in range(1, 8):
raise ValueError("bad field type")
# if setting a radio button to ON, first set Off all buttons
# in the group - this is not done by MuPDF:
if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"):
# so we are about setting this button to ON/True
# check other buttons in same group and set them to 'Off'
doc = self.parent.parent
kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids")
if kids_type == "array":
xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split()))
for xref in xrefs:
if xref != self.xref:
doc.xref_set_key(xref, "AS", "/Off")
# the calling method will now set the intended button to on and
# will find everything prepared for correct functioning.
def _parse_da(self):
"""Extract font name, size and color from default appearance string (/DA object).
Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
"""
if not self._text_da:
return
font = "Helv"
fsize = 0
col = (0, 0, 0)
dat = self._text_da.split() # split on any whitespace
for i, item in enumerate(dat):
if item == "Tf":
font = dat[i - 2][1:]
fsize = float(dat[i - 1])
dat[i] = dat[i-1] = dat[i-2] = ""
continue
if item == "g": # unicolor text
col = [(float(dat[i - 1]))]
dat[i] = dat[i-1] = ""
continue
if item == "rg": # RGB colored text
col = [float(f) for f in dat[i - 3:i]]
dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
continue
self.text_font = font
self.text_fontsize = fsize
self.text_color = col
self._text_da = ""
return
def _validate(self):
"""Validate the class entries.
"""
if (self.rect.is_infinite
or self.rect.is_empty
):
raise ValueError("bad rect")
if not self.field_name:
raise ValueError("field name missing")
if self.field_label == "Unnamed":
self.field_label = None
CheckColor(self.border_color)
CheckColor(self.fill_color)
if not self.text_color:
self.text_color = (0, 0, 0)
CheckColor(self.text_color)
if not self.border_width:
self.border_width = 0
if not self.text_fontsize:
self.text_fontsize = 0
self.border_style = self.border_style.upper()[0:1]
# standardize content of JavaScript entries
btn_type = self.field_type in (
mupdf.PDF_WIDGET_TYPE_BUTTON,
mupdf.PDF_WIDGET_TYPE_CHECKBOX,
mupdf.PDF_WIDGET_TYPE_RADIOBUTTON,
)
if not self.script:
self.script = None
elif type(self.script) is not str:
raise ValueError("script content must be a string")
# buttons cannot have the following script actions
if btn_type or not self.script_calc:
self.script_calc = None
elif type(self.script_calc) is not str:
raise ValueError("script_calc content must be a string")
if btn_type or not self.script_change:
self.script_change = None
elif type(self.script_change) is not str:
raise ValueError("script_change content must be a string")
if btn_type or not self.script_format:
self.script_format = None
elif type(self.script_format) is not str:
raise ValueError("script_format content must be a string")
if btn_type or not self.script_stroke:
self.script_stroke = None
elif type(self.script_stroke) is not str:
raise ValueError("script_stroke content must be a string")
if btn_type or not self.script_blur:
self.script_blur = None
elif type(self.script_blur) is not str:
raise ValueError("script_blur content must be a string")
if btn_type or not self.script_focus:
self.script_focus = None
elif type(self.script_focus) is not str:
raise ValueError("script_focus content must be a string")
self._checker() # any field_type specific checks
def button_states(self):
"""Return the on/off state names for button widgets.
A button may have 'normal' or 'pressed down' appearances. While the 'Off'
state is usually called like this, the 'On' state is often given a name
relating to the functional context.
"""
if self.field_type not in (2, 5):
return None # no button type
if hasattr(self, "parent"): # field already exists on page
doc = self.parent.parent
else:
return
xref = self.xref
states = {"normal": None, "down": None}
APN = doc.xref_get_key(xref, "AP/N")
if APN[0] == "dict":
nstates = []
APN = APN[1][2:-2]
apnt = APN.split("/")[1:]
for x in apnt:
nstates.append(x.split()[0])
states["normal"] = nstates
if APN[0] == "xref":
nstates = []
nxref = int(APN[1].split(" ")[0])
APN = doc.xref_object(nxref)
apnt = APN.split("/")[1:]
for x in apnt:
nstates.append(x.split()[0])
states["normal"] = nstates
APD = doc.xref_get_key(xref, "AP/D")
if APD[0] == "dict":
dstates = []
APD = APD[1][2:-2]
apdt = APD.split("/")[1:]
for x in apdt:
dstates.append(x.split()[0])
states["down"] = dstates
if APD[0] == "xref":
dstates = []
dxref = int(APD[1].split(" ")[0])
APD = doc.xref_object(dxref)
apdt = APD.split("/")[1:]
for x in apdt:
dstates.append(x.split()[0])
states["down"] = dstates
return states
@property
def next(self):
return self._annot.next
def on_state(self):
"""Return the "On" value for button widgets.
This is useful for radio buttons mainly. Checkboxes will always return
"Yes". Radio buttons will return the string that is unequal to "Off"
as returned by method button_states().
If the radio button is new / being created, it does not yet have an
"On" value. In this case, a warning is shown and True is returned.
"""
if self.field_type not in (2, 5):
return None # no checkbox or radio button
if self.field_type == 2:
return "Yes"
bstate = self.button_states()
if bstate is None:
bstate = dict()
for k in bstate.keys():
for v in bstate[k]:
if v != "Off":
return v
message("warning: radio button has no 'On' value.")
return True
def reset(self):
"""Reset the field value to its default.
"""
TOOLS._reset_widget(self._annot)
def update(self):
"""Reflect Python object in the PDF.
"""
self._validate()
self._adjust_font() # ensure valid text_font name
# now create the /DA string
self._text_da = ""
if len(self.text_color) == 3:
fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da
elif len(self.text_color) == 1:
fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da
elif len(self.text_color) == 4:
fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da
self._text_da = fmt.format(*self.text_color, f=self.text_font,
s=self.text_fontsize)
# finally update the widget
# if widget has a '/AA/C' script, make sure it is in the '/CO'
# array of the '/AcroForm' dictionary.
if self.script_calc: # there is a "calculation" script:
# make sure we are in the /CO array
util_ensure_widget_calc(self._annot)
# finally update the widget
TOOLS._save_widget(self._annot, self)
self._text_da = ""
from . import _extra
class Outline:
def __init__(self, ol):
self.this = ol
@property
def dest(self):
'''outline destination details'''
return linkDest(self, None, None)
def destination(self, document):
'''
Like `dest` property but uses `document` to resolve destinations for
kind=LINK_NAMED.
'''
return linkDest(self, None, document)
@property
def down(self):
ol = self.this
down_ol = ol.down()
if not down_ol.m_internal:
return
return Outline(down_ol)
@property
def is_external(self):
if g_use_extra:
# calling _extra.* here appears to save significant time in
# test_toc.py:test_full_toc, 1.2s=>0.94s.
#
return _extra.Outline_is_external( self.this)
ol = self.this
if not ol.m_internal:
return False
uri = ol.m_internal.uri if 1 else ol.uri()
if uri is None:
return False
return mupdf.fz_is_external_link(uri)
@property
def is_open(self):
if 1:
return self.this.m_internal.is_open
return self.this.is_open()
@property
def next(self):
ol = self.this
next_ol = ol.next()
if not next_ol.m_internal:
return
return Outline(next_ol)
@property
def page(self):
if 1:
return self.this.m_internal.page.page
return self.this.page().page
@property
def title(self):
return self.this.m_internal.title
@property
def uri(self):
ol = self.this
if not ol.m_internal:
return None
return ol.m_internal.uri
@property
def x(self):
return self.this.m_internal.x
@property
def y(self):
return self.this.m_internal.y
__slots__ = [ 'this']
def _make_PdfFilterOptions(
recurse=0,
instance_forms=0,
ascii=0,
no_update=0,
sanitize=0,
sopts=None,
):
'''
Returns a mupdf.PdfFilterOptions instance.
'''
filter_ = mupdf.PdfFilterOptions()
filter_.recurse = recurse
filter_.instance_forms = instance_forms
filter_.ascii = ascii
filter_.no_update = no_update
if sanitize:
# We want to use a PdfFilterFactory whose `.filter` fn pointer is
# set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
# get access to this raw fn in Python; and on Windows raw MuPDF
# functions are not even available to C++.
#
# So we use SWIG Director to implement our own
# PdfFilterFactory whose `filter()` method calls
# `mupdf.ll_pdf_new_sanitize_filter()`.
if sopts:
assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
else:
sopts = mupdf.PdfSanitizeFilterOptions()
class Factory(mupdf.PdfFilterFactory2):
def __init__(self):
super().__init__()
self.use_virtual_filter()
self.sopts = sopts
def filter(self, ctx, doc, chain, struct_parents, transform, options):
if 0:
log(f'sanitize filter.filter():')
log(f' {self=}')
log(f' {ctx=}')
log(f' {doc=}')
log(f' {chain=}')
log(f' {struct_parents=}')
log(f' {transform=}')
log(f' {options=}')
log(f' {self.sopts.internal()=}')
return mupdf.ll_pdf_new_sanitize_filter(
doc,
chain,
struct_parents,
transform,
options,
self.sopts.internal(),
)
factory = Factory()
filter_.add_factory(factory.internal())
filter_._factory = factory
return filter_
class Page:
def __init__(self, page, document):
assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}'
self.this = page
self.thisown = True
self.last_point = None
self.draw_cont = ''
self._annot_refs = dict()
self.parent = document
if page.m_internal:
if isinstance( page, mupdf.PdfPage):
self.number = page.m_internal.super.number
else:
self.number = page.m_internal.number
else:
self.number = None
def __repr__(self):
return self.__str__()
CheckParent(self)
x = self.parent.name
if self.parent.stream is not None:
x = "<memory, doc# %i>" % (self.parent._graft_id,)
if x == "":
x = "<new PDF, doc# %i>" % self.parent._graft_id
return "page %s of %s" % (self.number, x)
def __str__(self):
#CheckParent(self)
parent = getattr(self, 'parent', None)
if isinstance(self.this.m_internal, mupdf.pdf_page):
number = self.this.m_internal.super.number
else:
number = self.this.m_internal.number
ret = f'page {number}'
if parent:
x = self.parent.name
if self.parent.stream is not None:
x = "<memory, doc# %i>" % (self.parent._graft_id,)
if x == "":
x = "<new PDF, doc# %i>" % self.parent._graft_id
ret += f' of {x}'
return ret
def _add_caret_annot(self, point):
if g_use_extra:
annot = extra._add_caret_annot( self.this, JM_point_from_py(point))
elif g_use_extra:
# This reduces a multi-it version of
# PyMuPDF/tests/test_annots.py:test_caret() from t=0.328 to
# t=0.197. PyMuPDF is 0.0712. Native PyMuPDF is 0.0712.
if isinstance( self.this, mupdf.PdfPage):
page = self.this
else:
page = mupdf.pdf_page_from_fz_page( self.this)
#log( '{=type(point) point}')
annot = extra._add_caret_annot( page, JM_point_from_py(point))
else:
page = self._pdf_page()
annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET)
if point:
p = JM_point_from_py(point)
r = mupdf.pdf_annot_rect(annot)
r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
mupdf.pdf_set_annot_rect(annot, r)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
return annot
def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None):
page = self._pdf_page()
uf = ufilename if ufilename else filename
d = desc if desc else filename
p = JM_point_from_py(point)
ASSERT_PDF(page)
filebuf = JM_BufferFromBytes(buffer_)
if not filebuf.m_internal:
raise TypeError( MSG_BAD_BUFFER)
annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT)
r = mupdf.pdf_annot_rect(annot)
r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
mupdf.pdf_set_annot_rect(annot, r)
flags = mupdf.PDF_ANNOT_IS_PRINT
mupdf.pdf_set_annot_flags(annot, flags)
if icon:
mupdf.pdf_set_annot_icon_name(annot, icon)
val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1)
mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val)
mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename)
mupdf.pdf_update_annot(annot)
mupdf.pdf_set_annot_rect(annot, r)
mupdf.pdf_set_annot_flags(annot, flags)
JM_add_annot_id(annot, "A")
return Annot(annot)
def _add_freetext_annot(
self, rect,
text,
fontsize=11,
fontname=None,
text_color=None,
fill_color=None,
border_color=None,
align=0,
rotate=0,
):
page = self._pdf_page()
nfcol, fcol = JM_color_FromSequence(fill_color)
ntcol, tcol = JM_color_FromSequence(text_color)
r = JM_rect_from_py(rect)
if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
raise ValueError( MSG_BAD_RECT)
annot = mupdf.pdf_create_annot( page, mupdf.PDF_ANNOT_FREE_TEXT)
annot_obj = mupdf.pdf_annot_obj( annot)
mupdf.pdf_set_annot_contents( annot, text)
mupdf.pdf_set_annot_rect( annot, r)
mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Rotate'), rotate)
mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Q'), align)
if nfcol > 0:
mupdf.pdf_set_annot_color( annot, fcol[:nfcol])
# insert the default appearance string
JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize)
mupdf.pdf_update_annot( annot)
JM_add_annot_id(annot, "A")
val = Annot(annot)
#%pythonappend _add_freetext_annot
ap = val._getAP()
BT = ap.find(b"BT")
ET = ap.rfind(b"ET") + 2
ap = ap[BT:ET]
w = rect[2]-rect[0]
h = rect[3]-rect[1]
if rotate in (90, -90, 270):
w, h = h, w
re = f"0 0 {_format_g((w, h))} re".encode()
ap = re + b"\nW\nn\n" + ap
ope = None
bwidth = b""
fill_string = ColorCode(fill_color, "f").encode()
if fill_string:
fill_string += b"\n"
ope = b"f"
stroke_string = ColorCode(border_color, "c").encode()
if stroke_string:
stroke_string += b"\n"
bwidth = b"1 w\n"
ope = b"S"
if fill_string and stroke_string:
ope = b"B"
if ope is not None:
ap = bwidth + fill_string + stroke_string + re + b"\n" + ope + b"\n" + ap
val._setAP(ap)
return val
def _add_ink_annot(self, list):
page = mupdf.pdf_page_from_fz_page(self.this)
ASSERT_PDF(page)
if not PySequence_Check(list):
raise ValueError( MSG_BAD_ARG_INK_ANNOT)
ctm = mupdf.FzMatrix()
mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm)
inv_ctm = mupdf.fz_invert_matrix(ctm)
annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK)
annot_obj = mupdf.pdf_annot_obj(annot)
n0 = len(list)
inklist = mupdf.pdf_new_array(page.doc(), n0)
for j in range(n0):
sublist = list[j]
n1 = len(sublist)
stroke = mupdf.pdf_new_array(page.doc(), 2 * n1)
for i in range(n1):
p = sublist[i]
if not PySequence_Check(p) or PySequence_Size(p) != 2:
raise ValueError( MSG_BAD_ARG_INK_ANNOT)
point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm)
mupdf.pdf_array_push_real(stroke, point.x)
mupdf.pdf_array_push_real(stroke, point.y)
mupdf.pdf_array_push(inklist, stroke)
mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
return Annot(annot)
def _add_line_annot(self, p1, p2):
page = self._pdf_page()
ASSERT_PDF(page)
annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE)
a = JM_point_from_py(p1)
b = JM_point_from_py(p2)
mupdf.pdf_set_annot_line(annot, a, b)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
assert annot.m_internal
return Annot(annot)
def _add_multiline(self, points, annot_type):
page = self._pdf_page()
if len(points) < 2:
raise ValueError( MSG_BAD_ARG_POINTS)
annot = mupdf.pdf_create_annot(page, annot_type)
for p in points:
if (PySequence_Size(p) != 2):
raise ValueError( MSG_BAD_ARG_POINTS)
point = JM_point_from_py(p)
mupdf.pdf_add_annot_vertex(annot, point)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
return Annot(annot)
def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None):
page = self._pdf_page()
fcol = [ 1, 1, 1, 0]
nfcol = 0
annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT)
q = JM_quad_from_py(quad)
r = mupdf.fz_rect_from_quad(q)
# TODO calculate de-rotated rect
mupdf.pdf_set_annot_rect(annot, r)
if fill:
nfcol, fcol = JM_color_FromSequence(fill)
arr = mupdf.pdf_new_array(page.doc(), nfcol)
for i in range(nfcol):
mupdf.pdf_array_push_real(arr, fcol[i])
mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr)
if text:
mupdf.pdf_dict_puts(
mupdf.pdf_annot_obj(annot),
"OverlayText",
mupdf.pdf_new_text_string(text),
)
mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str)
mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
annot = mupdf.ll_pdf_keep_annot(annot.m_internal)
annot = mupdf.PdfAnnot( annot)
return Annot(annot)
def _add_square_or_circle(self, rect, annot_type):
page = self._pdf_page()
r = JM_rect_from_py(rect)
if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
raise ValueError( MSG_BAD_RECT)
annot = mupdf.pdf_create_annot(page, annot_type)
mupdf.pdf_set_annot_rect(annot, r)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
assert annot.m_internal
return Annot(annot)
def _add_stamp_annot(self, rect, stamp=0):
page = self._pdf_page()
stamp_id = [
PDF_NAME('Approved'),
PDF_NAME('AsIs'),
PDF_NAME('Confidential'),
PDF_NAME('Departmental'),
PDF_NAME('Experimental'),
PDF_NAME('Expired'),
PDF_NAME('Final'),
PDF_NAME('ForComment'),
PDF_NAME('ForPublicRelease'),
PDF_NAME('NotApproved'),
PDF_NAME('NotForPublicRelease'),
PDF_NAME('Sold'),
PDF_NAME('TopSecret'),
PDF_NAME('Draft'),
]
n = len(stamp_id)
name = stamp_id[0]
ASSERT_PDF(page)
r = JM_rect_from_py(rect)
if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
raise ValueError( MSG_BAD_RECT)
if _INRANGE(stamp, 0, n-1):
name = stamp_id[stamp]
annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP)
mupdf.pdf_set_annot_rect(annot, r)
try:
n = PDF_NAME('Name')
mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'), name)
except Exception:
if g_exceptions_verbose: exception_info()
raise
mupdf.pdf_set_annot_contents(
annot,
mupdf.pdf_dict_get_name(mupdf.pdf_annot_obj(annot), PDF_NAME('Name')),
)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
return Annot(annot)
def _add_text_annot(self, point, text, icon=None):
page = self._pdf_page()
p = JM_point_from_py( point)
ASSERT_PDF(page)
annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT)
r = mupdf.pdf_annot_rect(annot)
r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
mupdf.pdf_set_annot_rect(annot, r)
mupdf.pdf_set_annot_contents(annot, text)
if icon:
mupdf.pdf_set_annot_icon_name(annot, icon)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
return Annot(annot)
def _add_text_marker(self, quads, annot_type):
CheckParent(self)
if not self.parent.is_pdf:
raise ValueError("is no PDF")
val = Page__add_text_marker(self, quads, annot_type)
if not val:
return None
val.parent = weakref.proxy(self)
self._annot_refs[id(val)] = val
return val
def _addAnnot_FromString(self, linklist):
"""Add links from list of object sources."""
CheckParent(self)
if g_use_extra:
self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString
#log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().')
return extra.Page_addAnnot_FromString( self.this, linklist)
page = mupdf.pdf_page_from_fz_page(self.this)
lcount = len(linklist) # link count
if lcount < 1:
return
i = -1
# insert links from the provided sources
ASSERT_PDF(page)
if not isinstance(linklist, tuple):
raise ValueError( "bad 'linklist' argument")
if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal:
mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount)
annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
assert annots.m_internal, f'{lcount=} {annots.m_internal=}'
for i in range(lcount):
txtpy = linklist[i]
text = JM_StrAsChar(txtpy)
if not text:
message("skipping bad link / annot item %i.", i)
continue
try:
annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text))
ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0)
mupdf.pdf_array_push( annots, ind_obj)
except Exception:
if g_exceptions_verbose: exception_info()
message("skipping bad link / annot item %i.\n" % i)
def _addWidget(self, field_type, field_name):
page = self._pdf_page()
pdf = page.doc()
annot = JM_create_widget(pdf, page, field_type, field_name)
if not annot.m_internal:
raise RuntimeError( "cannot create widget")
JM_add_annot_id(annot, "W")
return Annot(annot)
def _apply_redactions(self, text, images, graphics):
page = self._pdf_page()
opts = mupdf.PdfRedactOptions()
opts.black_boxes = 0 # no black boxes
opts.text = text # how to treat text
opts.image_method = images # how to treat images
opts.line_art = graphics # how to treat vector graphics
ASSERT_PDF(page)
success = mupdf.pdf_redact_page(page.doc(), page, opts)
return success
def _erase(self):
self._reset_annot_refs()
try:
self.parent._forget_page(self)
except Exception:
exception_info()
pass
self.parent = None
self.thisown = False
self.number = None
self.this = None
def _count_q_balance(self):
"""Count missing graphic state pushs and pops.
Returns:
A pair of integers (push, pop). Push is the number of missing
PDF "q" commands, pop is the number of "Q" commands.
A balanced graphics state for the page will be reached if its
/Contents is prepended with 'push' copies of string "q\n"
and appended with 'pop' copies of "\nQ".
"""
page = _as_pdf_page(self) # need the underlying PDF page
res = mupdf.pdf_dict_get( # access /Resources
page.obj(),
mupdf.PDF_ENUM_NAME_Resources,
)
cont = mupdf.pdf_dict_get( # access /Contents
page.obj(),
mupdf.PDF_ENUM_NAME_Contents,
)
pdf = _as_pdf_document(self.parent) # need underlying PDF document
# return value of MuPDF function
return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont)
def _get_optional_content(self, oc: OptInt) -> OptStr:
if oc is None or oc == 0:
return None
doc = self.parent
check = doc.xref_object(oc, compressed=True)
if not ("/Type/OCG" in check or "/Type/OCMD" in check):
#log( 'raising "bad optional content"')
raise ValueError("bad optional content: 'oc'")
#log( 'Looking at self._get_resource_properties()')
props = {}
for p, x in self._get_resource_properties():
props[x] = p
if oc in props.keys():
return props[oc]
i = 0
mc = "MC%i" % i
while mc in props.values():
i += 1
mc = "MC%i" % i
self._set_resource_property(mc, oc)
#log( 'returning {mc=}')
return mc
def _get_resource_properties(self):
'''
page list Resource/Properties
'''
page = self._pdf_page()
ASSERT_PDF(page)
rc = JM_get_resource_properties(page.obj())
return rc
def _get_textpage(self, clip=None, flags=0, matrix=None):
if g_use_extra:
ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
tpage = mupdf.FzStextPage(ll_tpage)
return tpage
page = self.this
options = mupdf.FzStextOptions(flags)
rect = JM_rect_from_py(clip)
# Default to page's rect if `clip` not specified, for #2048.
rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip)
ctm = JM_matrix_from_py(matrix)
tpage = mupdf.FzStextPage(rect)
dev = mupdf.fz_new_stext_device(tpage, options)
if _globals.no_device_caching:
mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE)
if isinstance(page, mupdf.FzPage):
pass
elif isinstance(page, mupdf.PdfPage):
page = page.super()
else:
assert 0, f'Unrecognised {type(page)=}'
mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
mupdf.fz_close_device(dev)
return tpage
def _insert_image(self,
filename=None, pixmap=None, stream=None, imask=None, clip=None,
overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0,
xref=0, alpha=-1, _imgname=None, digests=None
):
maskbuf = mupdf.FzBuffer()
page = self._pdf_page()
# This will create an empty PdfDocument with a call to
# pdf_new_document() then assign page.doc()'s return value to it (which
# drop the original empty pdf_document).
pdf = page.doc()
w = width
h = height
img_xref = xref
rc_digest = 0
do_process_pixmap = 1
do_process_stream = 1
do_have_imask = 1
do_have_image = 1
do_have_xref = 1
if xref > 0:
ref = mupdf.pdf_new_indirect(pdf, xref, 0)
w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
if w + h == 0:
raise ValueError( MSG_IS_NO_IMAGE)
#goto have_xref()
do_process_pixmap = 0
do_process_stream = 0
do_have_imask = 0
do_have_image = 0
else:
if stream:
imgbuf = JM_BufferFromBytes(stream)
do_process_pixmap = 0
else:
if filename:
imgbuf = mupdf.fz_read_file(filename)
#goto have_stream()
do_process_pixmap = 0
if do_process_pixmap:
#log( 'do_process_pixmap')
# process pixmap ---------------------------------
arg_pix = pixmap.this
w = arg_pix.w()
h = arg_pix.h()
digest = mupdf.fz_md5_pixmap2(arg_pix)
md5_py = digest
temp = digests.get(md5_py, None)
if temp is not None:
img_xref = temp
ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
#goto have_xref()
do_process_stream = 0
do_have_imask = 0
do_have_image = 0
else:
if arg_pix.alpha() == 0:
image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage())
else:
pm = mupdf.fz_convert_pixmap(
arg_pix,
mupdf.FzColorspace(),
mupdf.FzColorspace(),
mupdf.FzDefaultColorspaces(None),
mupdf.FzColorParams(),
1,
)
pm.alpha = 0
pm.colorspace = None
mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
#goto have_image()
do_process_stream = 0
do_have_imask = 0
if do_process_stream:
#log( 'do_process_stream')
# process stream ---------------------------------
state = mupdf.FzMd5()
if mupdf_cppyy:
mupdf.fz_md5_update_buffer( state, imgbuf)
else:
mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len)
if imask:
maskbuf = JM_BufferFromBytes(imask)
if mupdf_cppyy:
mupdf.fz_md5_update_buffer( state, maskbuf)
else:
mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len)
digest = mupdf.fz_md5_final2(state)
md5_py = bytes(digest)
temp = digests.get(md5_py, None)
if temp is not None:
img_xref = temp
ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
#goto have_xref()
do_have_imask = 0
do_have_image = 0
else:
image = mupdf.fz_new_image_from_buffer(imgbuf)
w = image.w()
h = image.h()
if not imask:
#goto have_image()
do_have_imask = 0
if do_have_imask:
if mupdf_version_tuple >= (1, 24):
# `fz_compressed_buffer` is reference counted and
# `mupdf.fz_new_image_from_compressed_buffer2()`
# is povided as a Swig-friendly wrapper for
# `fz_new_image_from_compressed_buffer()`, so we can do things
# straightfowardly.
#
cbuf1 = mupdf.fz_compressed_image_buffer( image)
if not cbuf1.m_internal:
raise ValueError( "uncompressed image cannot have mask")
bpc = image.bpc()
colorspace = image.colorspace()
xres, yres = mupdf.fz_image_resolution(image)
mask = mupdf.fz_new_image_from_buffer(maskbuf)
image = mupdf.fz_new_image_from_compressed_buffer2(
w,
h,
bpc,
colorspace,
xres,
yres,
1, # interpolate
0, # imagemask,
list(), # decode
list(), # colorkey
cbuf1,
mask,
)
else:
#log( 'do_have_imask')
# mupdf.FzCompressedBuffer is not copyable, so
# mupdf.fz_compressed_image_buffer() does not work - it cannot
# return by value. And sharing a fz_compressed_buffer betwen two
# `fz_image`'s doesn't work, so we use a raw fz_compressed_buffer
# here, not a mupdf.FzCompressedBuffer.
#
cbuf1 = mupdf.ll_fz_compressed_image_buffer( image.m_internal)
if not cbuf1:
raise ValueError( "uncompressed image cannot have mask")
bpc = image.bpc()
colorspace = image.colorspace()
xres, yres = mupdf.fz_image_resolution(image)
mask = mupdf.fz_new_image_from_buffer(maskbuf)
# mupdf.ll_fz_new_image_from_compressed_buffer() is not usable.
zimg = extra.fz_new_image_from_compressed_buffer(
w,
h,
bpc,
colorspace.m_internal,
xres,
yres,
1, # interpolate
0, # imagemask,
cbuf1,
mask.m_internal,
)
zimg = mupdf.FzImage(zimg)
# `image` and `zimage` both have pointers to the same
# `fz_compressed_buffer`, which is not reference counted, and they
# both think that they own it.
#
# So we do what the classic implementataion does, and simply ensure
# that `fz_drop_image(image)` is never called. This will leak
# some of `image`'s allocations (for example the main `fz_image`
# allocation), but it's not trivial to avoid this.
#
# Perhaps we could manually set `fz_image`'s
# `fz_compressed_buffer*` to null? Trouble is we'd have to
# cast the `fz_image*` to a `fz_compressed_image*` to see the
# `fz_compressed_buffer*`, which is probably not possible from
# Python?
#
image.m_internal = None
image = zimg
if do_have_image:
#log( 'do_have_image')
ref = mupdf.pdf_add_image(pdf, image)
if oc:
JM_add_oc_object(pdf, ref, oc)
img_xref = mupdf.pdf_to_num(ref)
digests[md5_py] = img_xref
rc_digest = 1
if do_have_xref:
#log( 'do_have_xref')
resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
if not resources.m_internal:
resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
if not xobject.m_internal:
xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2)
mat = calc_image_matrix(w, h, clip, rotate, keep_proportion)
mupdf.pdf_dict_puts(xobject, _imgname, ref)
nres = mupdf.fz_new_buffer(50)
s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n"
#s = s.replace('\n', '\r\n')
mupdf.fz_append_string(nres, s)
JM_insert_contents(pdf, page.obj(), nres, overlay)
if rc_digest:
return img_xref, digests
else:
return img_xref, None
def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
page = self._pdf_page()
ASSERT_PDF(page)
pdf = page.doc()
value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering)
# get the objects /Resources, /Resources/Font
resources = mupdf.pdf_dict_get_inheritable( page.obj(), PDF_NAME('Resources'))
fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
if not fonts.m_internal: # page has no fonts yet
fonts = mupdf.pdf_new_dict(pdf, 5)
mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font'))
# store font in resources and fonts objects will contain named reference to font
_, xref = JM_INT_ITEM(value, 0)
if not xref:
raise RuntimeError( "cannot insert font")
font_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
mupdf.pdf_dict_puts(fonts, fontname, font_obj)
return value
def _load_annot(self, name, xref):
page = self._pdf_page()
ASSERT_PDF(page)
if xref == 0:
annot = JM_get_annot_by_name(page, name)
else:
annot = JM_get_annot_by_xref(page, xref)
return Annot(annot) if annot else None
def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None):
pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip)
return Pixmap(pix)
def _other_box(self, boxtype):
rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
page = mupdf.pdf_page_from_fz_page( self.this)
if page.m_internal:
obj = mupdf.pdf_dict_gets( page.obj(), boxtype)
if mupdf.pdf_is_array(obj):
rect = mupdf.pdf_to_rect(obj)
if mupdf.fz_is_infinite_rect( rect):
return
return JM_py_from_rect(rect)
def _pdf_page(self):
'''
Returns self.this as a mupdf.PdfPage using pdf_page_from_fz_page() if
required.
'''
if isinstance(self.this, mupdf.PdfPage):
return self.this
return mupdf.pdf_page_from_fz_page(self.this)
def _reset_annot_refs(self):
"""Invalidate / delete all annots of this page."""
self._annot_refs.clear()
def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None):
if CA >= 1 and ca >= 1 and blendmode is None:
return
tCA = int(round(max(CA , 0) * 100))
if tCA >= 100:
tCA = 99
tca = int(round(max(ca, 0) * 100))
if tca >= 100:
tca = 99
gstate = "fitzca%02i%02i" % (tCA, tca)
if not gstate:
return
page = mupdf.pdf_page_from_fz_page(self.this)
ASSERT_PDF(page)
resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
if not resources.m_internal:
resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
if not extg.m_internal:
extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2)
n = mupdf.pdf_dict_len(extg)
for i in range(n):
o1 = mupdf.pdf_dict_get_key(extg, i)
name = mupdf.pdf_to_name(o1)
if name == gstate:
return gstate
opa = mupdf.pdf_new_dict(page.doc(), 3)
mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA)
mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca)
mupdf.pdf_dict_puts(extg, gstate, opa)
return gstate
def _set_pagebox(self, boxtype, rect):
doc = self.parent
if doc is None:
raise ValueError("orphaned object: parent is None")
if not doc.is_pdf:
raise ValueError("is no PDF")
valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox")
if boxtype not in valid_boxes:
raise ValueError("bad boxtype")
rect = Rect(rect)
mb = self.mediabox
rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1):
raise ValueError(f"{boxtype} not in MediaBox")
doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]")
def _set_resource_property(self, name, xref):
page = self._pdf_page()
ASSERT_PDF(page)
JM_set_resource_property(page.obj(), name, xref)
def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None):
cropbox = JM_rect_from_py(clip)
mat = JM_matrix_from_py(matrix)
rc_xref = xref
tpage = mupdf.pdf_page_from_fz_page(self.this)
tpageref = tpage.obj()
pdfout = tpage.doc() # target PDF
ENSURE_OPERATION(pdfout)
#-------------------------------------------------------------
# convert the source page to a Form XObject
#-------------------------------------------------------------
xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this)
if not rc_xref:
rc_xref = mupdf.pdf_to_num(xobj1)
#-------------------------------------------------------------
# create referencing XObject (controls display on target page)
#-------------------------------------------------------------
# fill reference to xobj1 into the /Resources
#-------------------------------------------------------------
subres1 = mupdf.pdf_new_dict(pdfout, 5)
mupdf.pdf_dict_puts(subres1, "fullpage", xobj1)
subres = mupdf.pdf_new_dict(pdfout, 5)
mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1)
res = mupdf.fz_new_buffer(20)
mupdf.fz_append_string(res, "/fullpage Do")
xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res)
if oc > 0:
JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc)
#-------------------------------------------------------------
# update target page with xobj2:
#-------------------------------------------------------------
# 1. insert Xobject in Resources
#-------------------------------------------------------------
resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
if not subres.m_internal:
subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
mupdf.pdf_dict_puts(subres, _imgname, xobj2)
#-------------------------------------------------------------
# 2. make and insert new Contents object
#-------------------------------------------------------------
nres = mupdf.fz_new_buffer(50) # buffer for Do-command
mupdf.fz_append_string(nres, " q /") # Do-command
mupdf.fz_append_string(nres, _imgname)
mupdf.fz_append_string(nres, " Do Q ")
JM_insert_contents(pdfout, tpageref, nres, overlay)
return rc_xref
def add_caret_annot(self, point: point_like) -> Annot:
"""Add a 'Caret' annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_caret_annot(point)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot = Annot( annot)
annot_postprocess(self, annot)
assert hasattr( annot, 'parent')
return annot
def add_circle_annot(self, rect: rect_like) -> Annot:
"""Add a 'Circle' (ellipse, oval) annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_file_annot(
self,
point: point_like,
buffer_: typing.ByteString,
filename: str,
ufilename: OptStr =None,
desc: OptStr =None,
icon: OptStr =None
) -> Annot:
"""Add a 'FileAttachment' annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_file_annot(point,
buffer_,
filename,
ufilename=ufilename,
desc=desc,
icon=icon,
)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_freetext_annot(
self,
rect: rect_like,
text: str,
fontsize: float =11,
fontname: OptStr =None,
border_color: OptSeq =None,
text_color: OptSeq =None,
fill_color: OptSeq =None,
align: int =0,
rotate: int =0
) -> Annot:
"""Add a 'FreeText' annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_freetext_annot(
rect,
text,
fontsize=fontsize,
fontname=fontname,
border_color=border_color,
text_color=text_color,
fill_color=fill_color,
align=align,
rotate=rotate,
)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_highlight_annot(self, quads=None, start=None,
stop=None, clip=None) -> Annot:
"""Add a 'Highlight' annotation."""
if quads is None:
q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
else:
q = CheckMarkerArg(quads)
ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT)
return ret
def add_ink_annot(self, handwriting: list) -> Annot:
"""Add a 'Ink' ('handwriting') annotation.
The argument must be a list of lists of point_likes.
"""
old_rotation = annot_preprocess(self)
try:
annot = self._add_ink_annot(handwriting)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_line_annot(self, p1: point_like, p2: point_like) -> Annot:
"""Add a 'Line' annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_line_annot(p1, p2)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_polygon_annot(self, points: list) -> Annot:
"""Add a 'Polygon' annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_polyline_annot(self, points: list) -> Annot:
"""Add a 'PolyLine' annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_rect_annot(self, rect: rect_like) -> Annot:
"""Add a 'Square' (rectangle) annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_redact_annot(
self,
quad,
text: OptStr =None,
fontname: OptStr =None,
fontsize: float =11,
align: int =0,
fill: OptSeq =None,
text_color: OptSeq =None,
cross_out: bool =True,
) -> Annot:
"""Add a 'Redact' annotation."""
da_str = None
if text:
CheckColor(fill)
CheckColor(text_color)
if not fontname:
fontname = "Helv"
if not fontsize:
fontsize = 11
if not text_color:
text_color = (0, 0, 0)
if hasattr(text_color, "__float__"):
text_color = (text_color, text_color, text_color)
if len(text_color) > 3:
text_color = text_color[:3]
fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
da_str = fmt.format(*text_color, f=fontname, s=fontsize)
if fill is None:
fill = (1, 1, 1)
if fill:
if hasattr(fill, "__float__"):
fill = (fill, fill, fill)
if len(fill) > 3:
fill = fill[:3]
old_rotation = annot_preprocess(self)
try:
annot = self._add_redact_annot(quad, text=text, da_str=da_str,
align=align, fill=fill)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
#-------------------------------------------------------------
# change appearance to show a crossed-out rectangle
#-------------------------------------------------------------
if cross_out:
ap_tab = annot._getAP().splitlines()[:-1] # get the 4 commands only
_, LL, LR, UR, UL = ap_tab
ap_tab.append(LR)
ap_tab.append(LL)
ap_tab.append(UR)
ap_tab.append(LL)
ap_tab.append(UL)
ap_tab.append(b"S")
ap = b"\n".join(ap_tab)
annot._setAP(ap, 0)
return annot
def add_squiggly_annot(
self,
quads=None,
start=None,
stop=None,
clip=None,
) -> Annot:
"""Add a 'Squiggly' annotation."""
if quads is None:
q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
else:
q = CheckMarkerArg(quads)
return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
def add_stamp_annot(self, rect: rect_like, stamp: int =0) -> Annot:
"""Add a ('rubber') 'Stamp' annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_stamp_annot(rect, stamp)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
"""Add a 'StrikeOut' annotation."""
if quads is None:
q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
else:
q = CheckMarkerArg(quads)
return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT)
def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot:
"""Add a 'Text' (sticky note) annotation."""
old_rotation = annot_preprocess(self)
try:
annot = self._add_text_annot(point, text, icon=icon)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
annot_postprocess(self, annot)
return annot
def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
"""Add a 'Underline' annotation."""
if quads is None:
q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
else:
q = CheckMarkerArg(quads)
return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE)
def add_widget(self, widget: Widget) -> Annot:
"""Add a 'Widget' (form field)."""
CheckParent(self)
doc = self.parent
if not doc.is_pdf:
raise ValueError("is no PDF")
widget._validate()
annot = self._addWidget(widget.field_type, widget.field_name)
if not annot:
return None
annot.thisown = True
annot.parent = weakref.proxy(self) # owning page object
self._annot_refs[id(annot)] = annot
widget.parent = annot.parent
widget._annot = annot
widget.update()
return annot
def annot_names(self):
'''
page get list of annot names
'''
"""List of names of annotations, fields and links."""
CheckParent(self)
page = self._pdf_page()
if not page.m_internal:
return []
return JM_get_annot_id_list(page)
def annot_xrefs(self):
'''
List of xref numbers of annotations, fields and links.
'''
return JM_get_annot_xref_list2(self)
def annots(self, types=None):
""" Generator over the annotations of a page.
Args:
types: (list) annotation types to subselect from. If none,
all annotations are returned. E.g. types=[PDF_ANNOT_LINE]
will only yield line annotations.
"""
skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET)
if not hasattr(types, "__getitem__"):
annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
else:
annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
for xref in annot_xrefs:
annot = self.load_annot(xref)
annot._yielded=True
yield annot
@property
def artbox(self):
"""The ArtBox"""
rect = self._other_box("ArtBox")
if rect is None:
return self.cropbox
mb = self.mediabox
return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
@property
def bleedbox(self):
"""The BleedBox"""
rect = self._other_box("BleedBox")
if rect is None:
return self.cropbox
mb = self.mediabox
return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
def bound(self):
"""Get page rectangle."""
CheckParent(self)
page = _as_fz_page(self.this)
val = mupdf.fz_bound_page(page)
val = Rect(val)
if val.is_infinite and self.parent.is_pdf:
cb = self.cropbox
w, h = cb.width, cb.height
if self.rotation not in (0, 180):
w, h = h, w
val = Rect(0, 0, w, h)
msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1]
message(msg)
return val
def clean_contents(self, sanitize=1):
if not sanitize and not self.is_wrapped:
self.wrap_contents()
page = mupdf.pdf_page_from_fz_page( self.this)
if not page.m_internal:
return
filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize)
mupdf.pdf_filter_page_contents( page.doc(), page, filter_)
@property
def cropbox(self):
"""The CropBox."""
CheckParent(self)
page = self._pdf_page()
if not page.m_internal:
val = mupdf.fz_bound_page(self.this)
else:
val = JM_cropbox(page.obj())
val = Rect(val)
return val
@property
def cropbox_position(self):
return self.cropbox.tl
def delete_annot(self, annot):
"""Delete annot and return next one."""
CheckParent(self)
CheckParent(annot)
page = self._pdf_page()
while 1:
# first loop through all /IRT annots and remove them
irt_annot = JM_find_annot_irt(annot.this)
if not irt_annot: # no more there
break
mupdf.pdf_delete_annot(page, irt_annot.this)
nextannot = mupdf.pdf_next_annot(annot.this) # store next
mupdf.pdf_delete_annot(page, annot.this)
val = Annot(nextannot)
if val:
val.thisown = True
val.parent = weakref.proxy(self) # owning page object
val.parent._annot_refs[id(val)] = val
annot._erase()
return val
def delete_link(self, linkdict):
"""Delete a Link."""
CheckParent(self)
if not isinstance( linkdict, dict):
return # have no dictionary
def finished():
if linkdict["xref"] == 0: return
try:
linkid = linkdict["id"]
linkobj = self._annot_refs[linkid]
linkobj._erase()
except Exception:
# Don't print this exception, to match classic. Issue #2841.
if g_exceptions_verbose > 1: exception_info()
pass
page = mupdf.pdf_page_from_fz_page( self.this)
if not page.m_internal:
return finished() # have no PDF
xref = linkdict[dictkey_xref]
if xref < 1:
return finished() # invalid xref
annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
if not annots.m_internal:
return finished() # have no annotations
len_ = mupdf.pdf_array_len( annots)
if len_ == 0:
return finished()
oxref = 0
for i in range( len_):
oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i))
if xref == oxref:
break # found xref in annotations
if xref != oxref:
return finished() # xref not in annotations
mupdf.pdf_array_delete( annots, i) # delete entry in annotations
mupdf.pdf_delete_object( page.doc(), xref) # delete link object
mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
JM_refresh_links( page)
return finished()
@property
def derotation_matrix(self) -> Matrix:
"""Reflects page de-rotation."""
if g_use_extra:
return Matrix(extra.Page_derotate_matrix( self.this))
pdfpage = self._pdf_page()
if not pdfpage.m_internal:
return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
return Matrix(JM_derotate_page_matrix(pdfpage))
def extend_textpage(self, tpage, flags=0, matrix=None):
page = self.this
tp = tpage.this
assert isinstance( tp, mupdf.FzStextPage)
options = mupdf.FzStextOptions()
options.flags = flags
ctm = JM_matrix_from_py(matrix)
dev = mupdf.FzDevice(tp, options)
mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie())
mupdf.fz_close_device( dev)
@property
def first_annot(self):
"""First annotation."""
CheckParent(self)
page = self._pdf_page()
if not page:
return
annot = mupdf.pdf_first_annot(page)
if not annot.m_internal:
return
val = Annot(annot)
val.thisown = True
val.parent = weakref.proxy(self) # owning page object
self._annot_refs[id(val)] = val
return val
@property
def first_link(self):
'''
First link on page
'''
return self.load_links()
@property
def first_widget(self):
"""First widget/field."""
CheckParent(self)
annot = 0
page = self._pdf_page()
if not page:
return
annot = mupdf.pdf_first_widget(page)
if not annot.m_internal:
return
val = Annot(annot)
val.thisown = True
val.parent = weakref.proxy(self) # owning page object
self._annot_refs[id(val)] = val
widget = Widget()
TOOLS._fill_widget(val, widget)
val = widget
return val
def get_bboxlog(self, layers=None):
CheckParent(self)
old_rotation = self.rotation
if old_rotation != 0:
self.set_rotation(0)
page = self.this
rc = []
inc_layers = True if layers else False
dev = JM_new_bbox_device( rc, inc_layers)
mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
mupdf.fz_close_device( dev)
if old_rotation != 0:
self.set_rotation(old_rotation)
return rc
def get_cdrawings(self, extended=None, callback=None, method=None):
"""Extract vector graphics ("line art") from the page."""
CheckParent(self)
old_rotation = self.rotation
if old_rotation != 0:
self.set_rotation(0)
page = self.this
if isinstance(page, mupdf.PdfPage):
# Downcast pdf_page to fz_page.
page = mupdf.FzPage(page)
assert isinstance(page, mupdf.FzPage), f'{self.this=}'
clips = True if extended else False
prect = mupdf.fz_bound_page(page)
if g_use_extra:
rc = extra.get_cdrawings(page, extended, callback, method)
else:
rc = list()
if callable(callback) or method is not None:
dev = JM_new_lineart_device_Device(callback, clips, method)
else:
dev = JM_new_lineart_device_Device(rc, clips, method)
dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
mupdf.fz_close_device(dev)
if old_rotation != 0:
self.set_rotation(old_rotation)
if callable(callback) or method is not None:
return
return rc
def get_contents(self):
"""Get xrefs of /Contents objects."""
CheckParent(self)
ret = []
page = mupdf.pdf_page_from_fz_page(self.this)
obj = page.obj()
contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents)
if mupdf.pdf_is_array(contents):
n = mupdf.pdf_array_len(contents)
for i in range(n):
icont = mupdf.pdf_array_get(contents, i)
xref = mupdf.pdf_to_num(icont)
ret.append(xref)
elif contents.m_internal:
xref = mupdf.pdf_to_num(contents)
ret.append( xref)
return ret
def get_displaylist(self, annots=1):
'''
Make a DisplayList from the page for Pixmap generation.
Include (default) or exclude annotations.
'''
CheckParent(self)
if annots:
dl = mupdf.fz_new_display_list_from_page(self.this)
else:
dl = mupdf.fz_new_display_list_from_page_contents(self.this)
return DisplayList(dl)
def get_drawings(self, extended: bool=False) -> list:
"""Retrieve vector graphics. The extended version includes clips.
Note:
For greater comfort, this method converts point-likes, rect-likes, quad-likes
of the C version to respective Point / Rect / Quad objects.
It also adds default items that are missing in original path types.
"""
allkeys = (
'closePath',
'fill',
'color',
'width',
'lineCap',
'lineJoin',
'dashes',
'stroke_opacity',
'fill_opacity',
'even_odd',
)
val = self.get_cdrawings(extended=extended)
for i in range(len(val)):
npath = val[i]
if not npath["type"].startswith("clip"):
npath["rect"] = Rect(npath["rect"])
else:
npath["scissor"] = Rect(npath["scissor"])
if npath["type"]!="group":
items = npath["items"]
newitems = []
for item in items:
cmd = item[0]
rest = item[1:]
if cmd == "re":
item = ("re", Rect(rest[0]).normalize(), rest[1])
elif cmd == "qu":
item = ("qu", Quad(rest[0]))
else:
item = tuple([cmd] + [Point(i) for i in rest])
newitems.append(item)
npath["items"] = newitems
if npath['type'] in ('f', 's'):
for k in allkeys:
npath[k] = npath.get(k)
val[i] = npath
return val
class Drawpath(object):
"""Reflects a path dictionary from get_cdrawings()."""
def __init__(self, **args):
self.__dict__.update(args)
class Drawpathlist(object):
"""List of Path objects representing get_cdrawings() output."""
def __getitem__(self, item):
return self.paths.__getitem__(item)
def __init__(self):
self.paths = []
self.path_count = 0
self.group_count = 0
self.clip_count = 0
self.fill_count = 0
self.stroke_count = 0
self.fillstroke_count = 0
def __len__(self):
return self.paths.__len__()
def append(self, path):
self.paths.append(path)
self.path_count += 1
if path.type == "clip":
self.clip_count += 1
elif path.type == "group":
self.group_count += 1
elif path.type == "f":
self.fill_count += 1
elif path.type == "s":
self.stroke_count += 1
elif path.type == "fs":
self.fillstroke_count += 1
def clip_parents(self, i):
"""Return list of parent clip paths.
Args:
i: (int) return parents of this path.
Returns:
List of the clip parents."""
if i >= self.path_count:
raise IndexError("bad path index")
while i < 0:
i += self.path_count
lvl = self.paths[i].level
clips = list( # clip paths before identified one
reversed(
[
p
for p in self.paths[:i]
if p.type == "clip" and p.level < lvl
]
)
)
if clips == []: # none found: empty list
return []
nclips = [clips[0]] # init return list
for p in clips[1:]:
if p.level >= nclips[-1].level:
continue # only accept smaller clip levels
nclips.append(p)
return nclips
def group_parents(self, i):
"""Return list of parent group paths.
Args:
i: (int) return parents of this path.
Returns:
List of the group parents."""
if i >= self.path_count:
raise IndexError("bad path index")
while i < 0:
i += self.path_count
lvl = self.paths[i].level
groups = list( # group paths before identified one
reversed(
[
p
for p in self.paths[:i]
if p.type == "group" and p.level < lvl
]
)
)
if groups == []: # none found: empty list
return []
ngroups = [groups[0]] # init return list
for p in groups[1:]:
if p.level >= ngroups[-1].level:
continue # only accept smaller group levels
ngroups.append(p)
return ngroups
def get_lineart(self) -> object:
"""Get page drawings paths.
Note:
For greater comfort, this method converts point-like, rect-like, quad-like
tuples of the C version to respective Point / Rect / Quad objects.
Also adds default items that are missing in original path types.
In contrast to get_drawings(), this output is an object.
"""
val = self.get_cdrawings(extended=True)
paths = self.Drawpathlist()
for path in val:
npath = self.Drawpath(**path)
if npath.type != "clip":
npath.rect = Rect(path["rect"])
else:
npath.scissor = Rect(path["scissor"])
if npath.type != "group":
items = path["items"]
newitems = []
for item in items:
cmd = item[0]
rest = item[1:]
if cmd == "re":
item = ("re", Rect(rest[0]).normalize(), rest[1])
elif cmd == "qu":
item = ("qu", Quad(rest[0]))
else:
item = tuple([cmd] + [Point(i) for i in rest])
newitems.append(item)
npath.items = newitems
if npath.type == "f":
npath.stroke_opacity = None
npath.dashes = None
npath.line_join = None
npath.line_cap = None
npath.color = None
npath.width = None
paths.append(npath)
val = None
return paths
def remove_rotation(self):
"""Set page rotation to 0 while maintaining visual appearance."""
rot = self.rotation # normalized rotation value
if rot == 0:
return Identity # nothing to do
# need to derotate the page's content
mb = self.mediabox # current mediabox
if rot == 90:
# before derotation, shift content horizontally
mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
elif rot == 270:
# before derotation, shift content vertically
mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
else: # rot = 180
mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
# prefix with derotation matrix
mat = mat0 * self.derotation_matrix
cmd = b"%g %g %g %g %g %g cm " % tuple(mat)
_ = TOOLS._insert_contents(self, cmd, False) # prepend to page contents
# swap x- and y-coordinates
if rot in (90, 270):
x0, y0, x1, y1 = mb
mb.x0 = y0
mb.y0 = x0
mb.x1 = y1
mb.y1 = x1
self.set_mediabox(mb)
self.set_rotation(0)
rot = ~mat # inverse of the derotation matrix
for annot in self.annots(): # modify rectangles of annotations
r = annot.rect * rot
annot.set_rect(r)
for link in self.get_links(): # modify 'from' rectangles of links
r = link["from"] * rot
self.delete_link(link)
link["from"] = r
self.insert_link(link)
for widget in self.widgets(): # modify field rectangles
r = widget.rect * rot
widget.rect = r
widget.update()
return rot # the inverse of the generated derotation matrix
def cluster_drawings(
self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3
) -> list:
"""Join rectangles of neighboring vector graphic items.
Args:
clip: optional rect-like to restrict the page area to consider.
drawings: (optional) output of a previous "get_drawings()".
x_tolerance: horizontal neighborhood threshold.
y_tolerance: vertical neighborhood threshold.
Notes:
Vector graphics (also called line-art or drawings) usually consist
of independent items like rectangles, lines or curves to jointly
form table grid lines or bar, line, pie charts and similar.
This method identifies rectangles wrapping these disparate items.
Returns:
A list of Rect items, each wrapping line-art items that are close
enough to be considered forming a common vector graphic.
Only "significant" rectangles will be returned, i.e. having both,
width and height larger than the tolerance values.
"""
CheckParent(self)
parea = self.rect # the default clipping area
if clip is not None:
parea = Rect(clip)
delta_x = x_tolerance # shorter local name
delta_y = y_tolerance # shorter local name
if drawings is None: # if we cannot re-use a previous output
drawings = self.get_drawings()
def are_neighbors(r1, r2):
"""Detect whether r1, r2 are "neighbors".
Items r1, r2 are called neighbors if the minimum distance between
their points is less-equal delta.
Both parameters must be (potentially invalid) rectangles.
"""
# normalize rectangles as needed
rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0)
rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0)
rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0)
rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0)
if (
0
or rr1_x1 < rr2_x0 - delta_x
or rr1_x0 > rr2_x1 + delta_x
or rr1_y1 < rr2_y0 - delta_y
or rr1_y0 > rr2_y1 + delta_y
):
# Rects do not overlap.
return False
else:
# Rects overlap.
return True
# exclude graphics not contained in the clip
paths = [
p
for p in drawings
if 1
and p["rect"].x0 >= parea.x0
and p["rect"].x1 <= parea.x1
and p["rect"].y0 >= parea.y0
and p["rect"].y1 <= parea.y1
]
# list of all vector graphic rectangles
prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
new_rects = [] # the final list of the joined rectangles
# -------------------------------------------------------------------------
# The strategy is to identify and join all rects that are neighbors
# -------------------------------------------------------------------------
while prects: # the algorithm will empty this list
r = +prects[0] # copy of first rectangle
repeat = True
while repeat:
repeat = False
for i in range(len(prects) - 1, 0, -1): # from back to front
if are_neighbors(prects[i], r):
r |= prects[i].tl # include in first rect
r |= prects[i].br # include in first rect
del prects[i] # delete this rect
repeat = True
new_rects.append(r)
del prects[0]
prects = sorted(set(prects), key=lambda r: (r.y1, r.x0))
new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0))
return [r for r in new_rects if r.width > delta_x and r.height > delta_y]
def get_fonts(self, full=False):
"""List of fonts defined in the page object."""
CheckParent(self)
return self.parent.get_page_fonts(self.number, full=full)
def get_image_bbox(self, name, transform=0):
"""Get rectangle occupied by image 'name'.
'name' is either an item of the image list, or the referencing
name string - elem[7] of the resp. item.
Option 'transform' also returns the image transformation matrix.
"""
CheckParent(self)
doc = self.parent
if doc.is_closed or doc.is_encrypted:
raise ValueError('document closed or encrypted')
inf_rect = Rect(1, 1, -1, -1)
null_mat = Matrix()
if transform:
rc = (inf_rect, null_mat)
else:
rc = inf_rect
if type(name) in (list, tuple):
if not type(name[-1]) is int:
raise ValueError('need item of full page image list')
item = name
else:
imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]]
if len(imglist) == 1:
item = imglist[0]
elif imglist == []:
raise ValueError('bad image name')
else:
raise ValueError("found multiple images named '%s'." % name)
xref = item[-1]
if xref != 0 or transform is True:
try:
return self.get_image_rects(item, transform=transform)[0]
except Exception:
exception_info()
return inf_rect
pdf_page = self._pdf_page()
val = JM_image_reporter(pdf_page)
if not bool(val):
return rc
for v in val:
if v[0] != item[-3]:
continue
q = Quad(v[1])
bbox = q.rect
if transform == 0:
rc = bbox
break
hm = Matrix(util_hor_matrix(q.ll, q.lr))
h = abs(q.ll - q.ul)
w = abs(q.ur - q.ul)
m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0)
m = ~(hm * m0)
rc = (bbox, m)
break
val = rc
return val
def get_images(self, full=False):
"""List of images defined in the page object."""
CheckParent(self)
return self.parent.get_page_images(self.number, full=full)
def get_oc_items(self) -> list:
"""Get OCGs and OCMDs used in the page's contents.
Returns:
List of items (name, xref, type), where type is one of "ocg" / "ocmd",
and name is the property name.
"""
rc = []
for pname, xref in self._get_resource_properties():
text = self.parent.xref_object(xref, compressed=True)
if "/Type/OCG" in text:
octype = "ocg"
elif "/Type/OCMD" in text:
octype = "ocmd"
else:
continue
rc.append((pname, xref, octype))
return rc
def get_svg_image(self, matrix=None, text_as_path=1):
"""Make SVG image from page."""
CheckParent(self)
mediabox = mupdf.fz_bound_page(self.this)
ctm = JM_matrix_from_py(matrix)
tbounds = mediabox
text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT
tbounds = mupdf.fz_transform_rect(tbounds, ctm)
res = mupdf.fz_new_buffer(1024)
out = mupdf.FzOutput(res)
dev = mupdf.fz_new_svg_device(
out,
tbounds.x1-tbounds.x0, # width
tbounds.y1-tbounds.y0, # height
text_option,
1,
)
mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie())
mupdf.fz_close_device(dev)
out.fz_close_output()
text = JM_EscapeStrFromBuffer(res)
return text
def get_textbox(
page: Page,
rect: rect_like,
textpage=None, #: TextPage = None,
) -> str:
tp = textpage
if tp is None:
tp = page.get_textpage()
elif getattr(tp, "parent") != page:
raise ValueError("not a textpage of this page")
rc = tp.extractTextbox(rect)
if textpage is None:
del tp
return rc
def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
CheckParent(self)
if matrix is None:
matrix = Matrix(1, 1)
old_rotation = self.rotation
if old_rotation != 0:
self.set_rotation(0)
try:
textpage = self._get_textpage(clip, flags=flags, matrix=matrix)
finally:
if old_rotation != 0:
self.set_rotation(old_rotation)
textpage = TextPage(textpage)
textpage.parent = weakref.proxy(self)
return textpage
def get_texttrace(self):
CheckParent(self)
old_rotation = self.rotation
if old_rotation != 0:
self.set_rotation(0)
page = self.this
rc = []
if g_use_extra:
dev = extra.JM_new_texttrace_device(rc)
else:
dev = JM_new_texttrace_device(rc)
prect = mupdf.fz_bound_page(page)
dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
mupdf.fz_close_device(dev)
if old_rotation != 0:
self.set_rotation(old_rotation)
return rc
def get_xobjects(self):
"""List of xobjects defined in the page object."""
CheckParent(self)
return self.parent.get_page_xobjects(self.number)
def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None,
set_simple=False, wmode=0, encoding=0):
doc = self.parent
if doc is None:
raise ValueError("orphaned object: parent is None")
idx = 0
if fontname.startswith("/"):
fontname = fontname[1:]
inv_chars = INVALID_NAME_CHARS.intersection(fontname)
if inv_chars != set():
raise ValueError(f"bad fontname chars {inv_chars}")
font = CheckFont(self, fontname)
if font is not None: # font already in font list of page
xref = font[0] # this is the xref
if CheckFontInfo(doc, xref): # also in our document font list?
return xref # yes: we are done
# need to build the doc FontInfo entry - done via get_char_widths
doc.get_char_widths(xref)
return xref
#--------------------------------------------------------------------------
# the font is not present for this page
#--------------------------------------------------------------------------
bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font
serif = 0
CJK_number = -1
CJK_list_n = ["china-t", "china-s", "japan", "korea"]
CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"]
try:
CJK_number = CJK_list_n.index(fontname)
serif = 0
except Exception:
# Verbose in PyMuPDF/tests.
if g_exceptions_verbose > 1: exception_info()
pass
if CJK_number < 0:
try:
CJK_number = CJK_list_s.index(fontname)
serif = 1
except Exception:
# Verbose in PyMuPDF/tests.
if g_exceptions_verbose > 1: exception_info()
pass
if fontname.lower() in fitz_fontdescriptors.keys():
import pymupdf_fonts
fontbuffer = pymupdf_fonts.myfont(fontname) # make a copy
del pymupdf_fonts
# install the font for the page
if fontfile is not None:
if type(fontfile) is str:
fontfile_str = fontfile
elif hasattr(fontfile, "absolute"):
fontfile_str = str(fontfile)
elif hasattr(fontfile, "name"):
fontfile_str = fontfile.name
else:
raise ValueError("bad fontfile")
else:
fontfile_str = None
val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx,
wmode, serif, encoding, CJK_number)
if not val: # did not work, error return
return val
xref = val[0] # xref of installed font
fontdict = val[1]
if CheckFontInfo(doc, xref): # check again: document already has this font
return xref # we are done
# need to create document font info
doc.get_char_widths(xref, fontdict=fontdict)
return xref
@property
def is_wrapped(self):
"""Check if /Contents is in a balanced graphics state."""
return self._count_q_balance() == (0, 0)
@property
def language(self):
"""Page language."""
pdfpage = mupdf.pdf_page_from_fz_page(self.this)
if not pdfpage.m_internal:
return
lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang'))
if not lang.m_internal:
return
return mupdf.pdf_to_str_buf(lang)
def links(self, kinds=None):
""" Generator over the links of a page.
Args:
kinds: (list) link kinds to subselect from. If none,
all links are returned. E.g. kinds=[LINK_URI]
will only yield URI links.
"""
all_links = self.get_links()
for link in all_links:
if kinds is None or link["kind"] in kinds:
yield (link)
def load_annot(self, ident: typing.Union[str, int]) -> Annot:
"""Load an annot by name (/NM key) or xref.
Args:
ident: identifier, either name (str) or xref (int).
"""
CheckParent(self)
if type(ident) is str:
xref = 0
name = ident
elif type(ident) is int:
xref = ident
name = None
else:
raise ValueError("identifier must be a string or integer")
val = self._load_annot(name, xref)
if not val:
return val
val.thisown = True
val.parent = weakref.proxy(self)
self._annot_refs[id(val)] = val
return val
def load_links(self):
"""Get first Link."""
CheckParent(self)
val = mupdf.fz_load_links( self.this)
if not val.m_internal:
return
val = Link( val)
val.thisown = True
val.parent = weakref.proxy(self) # owning page object
self._annot_refs[id(val)] = val
val.xref = 0
val.id = ""
if self.parent.is_pdf:
xrefs = self.annot_xrefs()
xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK]
if xrefs:
link_id = xrefs[0]
val.xref = link_id[0]
val.id = link_id[2]
else:
val.xref = 0
val.id = ""
return val
#----------------------------------------------------------------
# page load widget by xref
#----------------------------------------------------------------
def load_widget( self, xref):
"""Load a widget by its xref."""
CheckParent(self)
page = mupdf.pdf_page_from_fz_page( self.this)
ASSERT_PDF(page)
annot = JM_get_widget_by_xref( page, xref)
#log( '{=type(annot)}')
val = annot
if not val:
return val
val.thisown = True
val.parent = weakref.proxy(self)
self._annot_refs[id(val)] = val
widget = Widget()
TOOLS._fill_widget(val, widget)
val = widget
return val
@property
def mediabox(self):
"""The MediaBox."""
CheckParent(self)
page = self._pdf_page()
if not page.m_internal:
rect = mupdf.fz_bound_page( self.this)
else:
rect = JM_mediabox( page.obj())
return Rect(rect)
@property
def mediabox_size(self):
return Point(self.mediabox.x1, self.mediabox.y1)
#@property
#def parent( self):
# assert self._parent
# if self._parent:
# return self._parent
# return Document( self.this.document())
def read_contents(self):
"""All /Contents streams concatenated to one bytes object."""
return TOOLS._get_all_contents(self)
def refresh(self):
"""Refresh page after link/annot/widget updates."""
CheckParent(self)
doc = self.parent
page = doc.reload_page(self)
# fixme this looks wrong.
self.this = page
@property
def rotation(self):
"""Page rotation."""
CheckParent(self)
page = self.this if isinstance(self.this, mupdf.PdfPage) else mupdf.pdf_page_from_fz_page(self.this)
if not page.m_internal:
return 0
return JM_page_rotation(page)
@property
def rotation_matrix(self) -> Matrix:
"""Reflects page rotation."""
return Matrix(TOOLS._rotate_matrix(self))
def run(self, dw, m):
"""Run page through a device.
dw: DeviceWrapper
"""
CheckParent(self)
mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
def set_artbox(self, rect):
"""Set the ArtBox."""
return self._set_pagebox("ArtBox", rect)
def set_bleedbox(self, rect):
"""Set the BleedBox."""
return self._set_pagebox("BleedBox", rect)
def set_contents(self, xref):
"""Set object at 'xref' as the page's /Contents."""
CheckParent(self)
doc = self.parent
if doc.is_closed:
raise ValueError("document closed")
if not doc.is_pdf:
raise ValueError("is no PDF")
if xref not in range(1, doc.xref_length()):
raise ValueError("bad xref")
if not doc.xref_is_stream(xref):
raise ValueError("xref is no stream")
doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref)
def set_cropbox(self, rect):
"""Set the CropBox. Will also change Page.rect."""
return self._set_pagebox("CropBox", rect)
def set_language(self, language=None):
"""Set PDF page default language."""
CheckParent(self)
pdfpage = mupdf.pdf_page_from_fz_page(self.this)
ASSERT_PDF(pdfpage)
if not language:
mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang'))
else:
lang = mupdf.fz_text_language_from_string(language)
assert hasattr(mupdf, 'fz_string_from_text_language2')
mupdf.pdf_dict_put_text_string(
pdfpage.obj,
PDF_NAME('Lang'),
mupdf.fz_string_from_text_language2(lang)
)
def set_mediabox(self, rect):
"""Set the MediaBox."""
CheckParent(self)
page = self._pdf_page()
ASSERT_PDF(page)
mediabox = JM_rect_from_py(rect)
if (mupdf.fz_is_empty_rect(mediabox)
or mupdf.fz_is_infinite_rect(mediabox)
):
raise ValueError( MSG_BAD_RECT)
mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox)
mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox'))
mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox'))
mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox'))
mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox'))
def set_rotation(self, rotation):
"""Set page rotation."""
CheckParent(self)
page = mupdf.pdf_page_from_fz_page( self.this)
ASSERT_PDF(page)
rot = JM_norm_rotation(rotation)
mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot)
def set_trimbox(self, rect):
"""Set the TrimBox."""
return self._set_pagebox("TrimBox", rect)
@property
def transformation_matrix(self):
"""Page transformation matrix."""
CheckParent(self)
ctm = mupdf.FzMatrix()
page = self._pdf_page()
if not page.m_internal:
return JM_py_from_matrix(ctm)
mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT) # fixme: original code passed mediabox=NULL.
mupdf.pdf_page_transform(page, mediabox, ctm)
val = JM_py_from_matrix(ctm)
if self.rotation % 360 == 0:
val = Matrix(val)
else:
val = Matrix(1, 0, 0, -1, 0, self.cropbox.height)
return val
@property
def trimbox(self):
"""The TrimBox"""
rect = self._other_box("TrimBox")
if rect is None:
return self.cropbox
mb = self.mediabox
return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
def widgets(self, types=None):
""" Generator over the widgets of a page.
Args:
types: (list) field types to subselect from. If none,
all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT]
will only yield text fields.
"""
#for a in self.annot_xrefs():
# log( '{a=}')
widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET]
#log(f'widgets(): {widget_xrefs=}')
for xref in widget_xrefs:
widget = self.load_widget(xref)
if types is None or widget.field_type in types:
yield (widget)
def wrap_contents(self):
"""Ensure page is in a balanced graphics state."""
push, pop = self._count_q_balance() # count missing "q"/"Q" commands
if push > 0: # prepend required push commands
prepend = b"q\n" * push
TOOLS._insert_contents(self, prepend, False)
if pop > 0: # append required pop commands
append = b"\nQ" * pop + b"\n"
TOOLS._insert_contents(self, append, True)
@property
def xref(self):
"""PDF xref number of page."""
CheckParent(self)
return self.parent.page_xref(self.number)
rect = property(bound, doc="page rectangle")
class Pixmap:
def __init__(self, *args):
"""
Pixmap(colorspace, irect, alpha) - empty pixmap.
Pixmap(colorspace, src) - copy changing colorspace.
Pixmap(src, width, height,[clip]) - scaled copy, float dimensions.
Pixmap(src, alpha=1) - copy and add or drop alpha channel.
Pixmap(filename) - from an image in a file.
Pixmap(image) - from an image in memory (bytes).
Pixmap(colorspace, width, height, samples, alpha) - from samples data.
Pixmap(PDFdoc, xref) - from an image at xref in a PDF document.
"""
if 0:
pass
elif args_match(args,
(Colorspace, mupdf.FzColorspace),
(mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple)
):
# create empty pixmap with colorspace and IRect
cs, rect = args
alpha = 0
pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
self.this = pm
elif args_match(args,
(Colorspace, mupdf.FzColorspace),
(mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple),
(int, bool)
):
# create empty pixmap with colorspace and IRect
cs, rect, alpha = args
pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
self.this = pm
elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)):
# copy pixmap, converting colorspace
cs, spix = args
if isinstance(cs, Colorspace):
cs = cs.this
elif cs is None:
cs = mupdf.FzColorspace(None)
if isinstance(spix, Pixmap):
spix = spix.this
if not mupdf.fz_pixmap_colorspace(spix).m_internal:
raise ValueError( "source colorspace must not be None")
if cs.m_internal:
self.this = mupdf.fz_convert_pixmap(
spix,
cs,
mupdf.FzColorspace(),
mupdf.FzDefaultColorspaces(None),
mupdf.FzColorParams(),
1
)
else:
self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix)
if not self.this.m_internal:
raise RuntimeError( MSG_PIX_NOALPHA)
elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)):
# add mask to a pixmap w/o alpha channel
spix, mpix = args
if isinstance(spix, Pixmap):
spix = spix.this
if isinstance(mpix, Pixmap):
mpix = mpix.this
spm = spix
mpm = mpix
if not spix.m_internal: # intercept NULL for spix: make alpha only pix
dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm)
if not dst.m_internal:
raise RuntimeError( MSG_PIX_NOALPHA)
else:
dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm)
self.this = dst
elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
# create pixmap as scaled copy of another one
if mupdf_version_tuple < (1, 23, 8):
assert 0, f'Cannot handle {args=} because fz_scale_pixmap() and fz_scale_pixmap_cached() are not declared in MuPDF headers'
if len(args) == 3:
spix, w, h = args
bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
else:
spix, w, h, clip = args
bbox = JM_irect_from_py(clip)
spix, w, h, clip = args
src_pix = spix.this if isinstance(spix, Pixmap) else spix
bbox = JM_irect_from_py(clip)
if not mupdf.fz_is_infinite_irect(bbox):
pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox)
else:
pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect))
self.this = pm
elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw':
# Special raw construction where we set .this directly.
_, pm = args
if isinstance(pm, Pixmap):
pm = pm.this
self.this = pm
elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)):
# Pixmap(struct Pixmap *spix, int alpha=1)
# copy pixmap & add / drop the alpha channel
spix = args[0]
alpha = args[1] if len(args) == 2 else 1
src_pix = spix.this if isinstance(spix, Pixmap) else spix
if not _INRANGE(alpha, 0, 1):
raise ValueError( "bad alpha value")
cs = mupdf.fz_pixmap_colorspace(src_pix)
if not cs.m_internal and not alpha:
raise ValueError( "cannot drop alpha for 'NULL' colorspace")
seps = mupdf.FzSeparations()
n = mupdf.fz_pixmap_colorants(src_pix)
w = mupdf.fz_pixmap_width(src_pix)
h = mupdf.fz_pixmap_height(src_pix)
pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
pm.m_internal.x = src_pix.m_internal.x
pm.m_internal.y = src_pix.m_internal.y
pm.m_internal.xres = src_pix.m_internal.xres
pm.m_internal.yres = src_pix.m_internal.yres
# copy samples data ------------------------------------------
if 1:
# We use specially-provided (by MuPDF Python bindings)
# ll_fz_pixmap_copy() to get best performance.
# test_pixmap.py:test_setalpha(): 3.9s t=0.0062
mupdf.ll_fz_pixmap_copy( pm.m_internal, src_pix.m_internal, n)
elif 1:
# Use memoryview.
# test_pixmap.py:test_setalpha(): 4.6 t=0.51
src_view = mupdf.fz_pixmap_samples_memoryview( src_pix)
pm_view = mupdf.fz_pixmap_samples_memoryview( pm)
if src_pix.alpha() == pm.alpha(): # identical samples
#memcpy(tptr, sptr, w * h * (n + alpha));
size = w * h * (n + alpha)
pm_view[ 0 : size] = src_view[ 0 : size]
else:
tptr = 0
sptr = 0
# This is a little faster than calling
# pm.fz_samples_set(), but still quite slow. E.g. reduces
# test_pixmap.py:test_setalpha() from 6.7s to 4.5s.
#
# t=0.53
pm_stride = pm.stride()
pm_n = pm.n()
pm_alpha = pm.alpha()
src_stride = src_pix.stride()
src_n = src_pix.n()
#log( '{=pm_stride pm_n src_stride src_n}')
for y in range( h):
for x in range( w):
pm_i = pm_stride * y + pm_n * x
src_i = src_stride * y + src_n * x
pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n]
if pm_alpha:
pm_view[ pm_i + n] = 255
else:
# Copy individual bytes from Python. Very slow.
# test_pixmap.py:test_setalpha(): 6.89 t=2.601
if src_pix.alpha() == pm.alpha(): # identical samples
#memcpy(tptr, sptr, w * h * (n + alpha));
for i in range(w * h * (n + alpha)):
mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i))
else:
# t=2.56
tptr = 0
sptr = 0
src_pix_alpha = src_pix.alpha()
for i in range(w * h):
#memcpy(tptr, sptr, n);
for j in range(n):
mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j))
tptr += n
if pm.alpha():
mupdf.fz_samples_set(pm, tptr, 255)
tptr += 1
sptr += n + src_pix_alpha
self.this = pm
elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)):
# create pixmap from samples data
cs, w, h, samples, alpha = args
if isinstance(cs, Colorspace):
cs = cs.this
assert isinstance(cs, mupdf.FzColorspace)
n = mupdf.fz_colorspace_n(cs)
stride = (n + alpha) * w
seps = mupdf.FzSeparations()
pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
if isinstance( samples, (bytes, bytearray)):
#log('using mupdf.python_buffer_data()')
samples2 = mupdf.python_buffer_data(samples)
size = len(samples)
else:
res = JM_BufferFromBytes(samples)
if not res.m_internal:
raise ValueError( "bad samples data")
size, c = mupdf.fz_buffer_storage(res)
samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`.
if stride * h != size:
raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}")
mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2)
self.this = pm
elif args_match(args, None):
# create pixmap from filename, file object, pathlib.Path or memory
imagedata, = args
name = 'name'
if hasattr(imagedata, "resolve"):
fname = imagedata.__str__()
if fname:
img = mupdf.fz_new_image_from_file(fname)
elif hasattr(imagedata, name):
fname = imagedata.name
if fname:
img = mupdf.fz_new_image_from_file(fname)
elif isinstance(imagedata, str):
img = mupdf.fz_new_image_from_file(imagedata)
else:
res = JM_BufferFromBytes(imagedata)
if not res.m_internal or not res.m_internal.len:
raise ValueError( "bad image data")
img = mupdf.fz_new_image_from_buffer(res)
# Original code passed null for subarea and ctm, but that's not
# possible with MuPDF's python bindings. The equivalent is an
# infinite rect and identify matrix scaled by img.w() and img.h().
pm, w, h = mupdf.fz_get_pixmap_from_image(
img,
mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0),
)
xres, yres = mupdf.fz_image_resolution(img)
pm.m_internal.xres = xres
pm.m_internal.yres = yres
self.this = pm
elif args_match(args, (Document, mupdf.FzDocument), int):
# Create pixmap from PDF image identified by XREF number
doc, xref = args
pdf = _as_pdf_document(doc)
ASSERT_PDF(pdf)
xreflen = mupdf.pdf_xref_len(pdf)
if not _INRANGE(xref, 1, xreflen-1):
raise ValueError( MSG_BAD_XREF)
ref = mupdf.pdf_new_indirect(pdf, xref, 0)
type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype'))
if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image'))
and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha'))
and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity'))
):
raise ValueError( MSG_IS_NO_IMAGE)
img = mupdf.pdf_load_image(pdf, ref)
# Original code passed null for subarea and ctm, but that's not
# possible with MuPDF's python bindings. The equivalent is an
# infinite rect and identify matrix scaled by img.w() and img.h().
pix, w, h = mupdf.fz_get_pixmap_from_image(
img,
mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0),
)
self.this = pix
else:
text = 'Unrecognised args for constructing Pixmap:\n'
for arg in args:
text += f' {type(arg)}: {arg}\n'
raise Exception( text)
# 2024-01-16: Experimental support for a memory-view of the underlying
# data. Doesn't seem to make much difference to Pixmap.set_pixel() so
# not currently used.
self._memory_view = None
def __len__(self):
return self.size
def __repr__(self):
if not type(self) is Pixmap: return
if self.colorspace:
return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha)
else:
return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha)
def _tobytes(self, format_, jpg_quality):
'''
Pixmap._tobytes
'''
pm = self.this
size = mupdf.fz_pixmap_stride(pm) * pm.h()
res = mupdf.fz_new_buffer(size)
out = mupdf.FzOutput(res)
if format_ == 1: mupdf.fz_write_pixmap_as_png(out, pm)
elif format_ == 2: mupdf.fz_write_pixmap_as_pnm(out, pm)
elif format_ == 3: mupdf.fz_write_pixmap_as_pam(out, pm)
elif format_ == 5: mupdf.fz_write_pixmap_as_psd(out, pm)
elif format_ == 6: mupdf.fz_write_pixmap_as_ps(out, pm)
elif format_ == 7:
if mupdf_version_tuple < (1, 24):
mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality)
else:
mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0)
else:
mupdf.fz_write_pixmap_as_png(out, pm)
out.fz_close_output()
barray = JM_BinFromBuffer(res)
return barray
def _writeIMG(self, filename, format_, jpg_quality):
pm = self.this
if format_ == 1: mupdf.fz_save_pixmap_as_png(pm, filename)
elif format_ == 2: mupdf.fz_save_pixmap_as_pnm(pm, filename)
elif format_ == 3: mupdf.fz_save_pixmap_as_pam(pm, filename)
elif format_ == 5: mupdf.fz_save_pixmap_as_psd(pm, filename)
elif format_ == 6: mupdf.fz_save_pixmap_as_ps(pm, filename)
elif format_ == 7: mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality)
else: mupdf.fz_save_pixmap_as_png(pm, filename)
@property
def alpha(self):
"""Indicates presence of alpha channel."""
return mupdf.fz_pixmap_alpha(self.this)
def clear_with(self, value=None, bbox=None):
"""Fill all color components with same value."""
if value is None:
mupdf.fz_clear_pixmap(self.this)
elif bbox is None:
mupdf.fz_clear_pixmap_with_value(self.this, value)
else:
JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox))
def color_count(self, colors=0, clip=None):
'''
Return count of each color.
'''
pm = self.this
rc = JM_color_count( pm, clip)
if not rc:
raise RuntimeError( MSG_COLOR_COUNT_FAILED)
if not colors:
return len( rc)
return rc
def color_topusage(self, clip=None):
"""Return most frequent color and its usage ratio."""
allpixels = 0
cnt = 0
if clip is not None and self.irect in Rect(clip):
clip = self.irect
for pixel, count in self.color_count(colors=True,clip=clip).items():
allpixels += count
if count > cnt:
cnt = count
maxpixel = pixel
if not allpixels:
return (1, bytes([255] * self.n))
return (cnt / allpixels, maxpixel)
@property
def colorspace(self):
"""Pixmap Colorspace."""
return Colorspace(mupdf.fz_pixmap_colorspace(self.this))
def copy(self, src, bbox):
"""Copy bbox from another Pixmap."""
pm = self.this
src_pix = src.this
if not mupdf.fz_pixmap_colorspace(src_pix):
raise ValueError( "cannot copy pixmap with NULL colorspace")
if pm.alpha() != src_pix.alpha():
raise ValueError( "source and target alpha must be equal")
mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None))
@property
def digest(self):
"""MD5 digest of pixmap (bytes)."""
ret = mupdf.fz_md5_pixmap2(self.this)
return bytes(ret)
def gamma_with(self, gamma):
"""Apply correction with some float.
gamma=1 is a no-op."""
if not mupdf.fz_pixmap_colorspace( self.this):
message_warning("colorspace invalid for function")
return
mupdf.fz_gamma_pixmap( self.this, gamma)
@property
def h(self):
"""The height."""
return mupdf.fz_pixmap_height(self.this)
def invert_irect(self, bbox=None):
"""Invert the colors inside a bbox."""
pm = self.this
if not mupdf.fz_pixmap_colorspace(pm):
message_warning("ignored for stencil pixmap")
return False
r = JM_irect_from_py(bbox)
if mupdf.fz_is_infinite_irect(r):
r = mupdf.fz_pixmap_bbox( pm)
return bool(JM_invert_pixmap_rect( pm, r))
@property
def irect(self):
"""Pixmap bbox - an IRect object."""
val = mupdf.fz_pixmap_bbox(self.this)
return JM_py_from_irect( val)
@property
def is_monochrome(self):
"""Check if pixmap is monochrome."""
return mupdf.fz_is_pixmap_monochrome( self.this)
@property
def is_unicolor(self):
'''
Check if pixmap has only one color.
'''
pm = self.this
n = pm.n()
count = pm.w() * pm.h() * n
def _pixmap_read_samples(pm, offset, n):
ret = list()
for i in range(n):
ret.append(mupdf.fz_samples_get(pm, offset+i))
return ret
sample0 = _pixmap_read_samples( pm, 0, n)
for offset in range( n, count, n):
sample = _pixmap_read_samples( pm, offset, n)
if sample != sample0:
return False
return True
@property
def n(self):
"""The size of one pixel."""
if g_use_extra:
# Setting self.__class__.n gives a small reduction in overhead of
# test_general.py:test_2093, e.g. 1.4x -> 1.3x.
#return extra.pixmap_n(self.this)
def n2(self):
return extra.pixmap_n(self.this)
self.__class__.n = property(n2)
return self.n
return mupdf.fz_pixmap_components(self.this)
def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
'''
Save pixmap as an OCR-ed PDF page.
'''
if not TESSDATA_PREFIX and not tessdata:
raise RuntimeError('No OCR support: TESSDATA_PREFIX not set')
opts = mupdf.FzPdfocrOptions()
opts.compress = compress
if language:
opts.language_set2( language)
if tessdata:
opts.datadir_set2( tessdata)
pix = self.this
if isinstance(filename, str):
mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts)
else:
out = JM_new_output_fileptr( filename)
mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts)
out.fz_close_output()
def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
"""Save pixmap as an OCR-ed PDF page.
Args:
compress: (bool) compress, default 1 (True).
language: (str) language(s) occurring on page, default "eng" (English),
multiples like "eng+ger" for English and German.
tessdata: (str) folder name of Tesseract's language support. Must be
given if environment variable TESSDATA_PREFIX is not set.
Notes:
On failure, make sure Tesseract is installed and you have set the
environment variable "TESSDATA_PREFIX" to the folder containing your
Tesseract's language support data.
"""
if not TESSDATA_PREFIX and not tessdata:
raise RuntimeError('No OCR support: TESSDATA_PREFIX not set')
from io import BytesIO
bio = BytesIO()
self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
return bio.getvalue()
def pil_save(self, *args, **kwargs):
"""Write to image file using Pillow.
Args are passed to Pillow's Image.save method, see their documentation.
Use instead of save when other output formats are desired.
"""
try:
from PIL import Image
except ImportError:
message("PIL/Pillow not installed")
raise
cspace = self.colorspace
if cspace is None:
mode = "L"
elif cspace.n == 1:
mode = "L" if self.alpha == 0 else "LA"
elif cspace.n == 3:
mode = "RGB" if self.alpha == 0 else "RGBA"
else:
mode = "CMYK"
img = Image.frombytes(mode, (self.width, self.height), self.samples)
if "dpi" not in kwargs.keys():
kwargs["dpi"] = (self.xres, self.yres)
img.save(*args, **kwargs)
def pil_tobytes(self, *args, **kwargs):
"""Convert to binary image stream using pillow.
Args are passed to Pillow's Image.save method, see their documentation.
Use instead of 'tobytes' when other output formats are needed.
"""
from io import BytesIO
bytes_out = BytesIO()
self.pil_save(bytes_out, *args, **kwargs)
return bytes_out.getvalue()
def pixel(self, x, y):
"""Get color tuple of pixel (x, y).
Last item is the alpha if Pixmap.alpha is true."""
if g_use_extra:
return extra.pixmap_pixel(self.this.m_internal, x, y)
if (0
or x < 0
or x >= self.this.m_internal.w
or y < 0
or y >= self.this.m_internal.h
):
RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError)
n = self.this.m_internal.n
stride = self.this.m_internal.stride
i = stride * y + n * x
ret = tuple( self.samples_mv[ i: i+n])
return ret
@property
def samples(self)->bytes:
mv = self.samples_mv
return bytes( mv)
@property
def samples_mv(self):
'''
Pixmap samples memoryview.
'''
return mupdf.fz_pixmap_samples_memoryview(self.this)
@property
def samples_ptr(self):
return mupdf.fz_pixmap_samples_int(self.this)
def save(self, filename, output=None, jpg_quality=95):
"""Output as image in format determined by filename extension.
Args:
output: (str) only use to overrule filename extension. Default is PNG.
Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS.
"""
valid_formats = {
"png": 1,
"pnm": 2,
"pgm": 2,
"ppm": 2,
"pbm": 2,
"pam": 3,
"psd": 5,
"ps": 6,
"jpg": 7,
"jpeg": 7,
}
if type(filename) is str:
pass
elif hasattr(filename, "absolute"):
filename = str(filename)
elif hasattr(filename, "name"):
filename = filename.name
if output is None:
_, ext = os.path.splitext(filename)
output = ext[1:]
idx = valid_formats.get(output.lower(), None)
if idx is None:
raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
if self.alpha and idx in (2, 6, 7):
raise ValueError("'%s' cannot have alpha" % output)
if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
raise ValueError("unsupported colorspace for '%s'" % output)
if idx == 7:
self.set_dpi(self.xres, self.yres)
return self._writeIMG(filename, idx, jpg_quality)
def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None):
"""Set alpha channel to values contained in a byte array.
If omitted, set alphas to 255.
Args:
alphavalues: (bytes) with length (width * height) or 'None'.
premultiply: (bool, True) premultiply colors with alpha values.
opaque: (tuple, length colorspace.n) this color receives opacity 0.
matte: (tuple, length colorspace.n)) preblending background color.
"""
pix = self.this
alpha = 0
m = 0
if pix.alpha() == 0:
raise ValueError( MSG_PIX_NOALPHA)
n = mupdf.fz_pixmap_colorants(pix)
w = mupdf.fz_pixmap_width(pix)
h = mupdf.fz_pixmap_height(pix)
balen = w * h * (n+1)
colors = [0, 0, 0, 0] # make this color opaque
bgcolor = [0, 0, 0, 0] # preblending background color
zero_out = 0
bground = 0
if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n:
for i in range(n):
colors[i] = opaque[i]
zero_out = 1
if matte and isinstance( matte, (tuple, list)) and len(matte) == n:
for i in range(n):
bgcolor[i] = matte[i]
bground = 1
data = bytes()
data_len = 0
if alphavalues:
#res = JM_BufferFromBytes(alphavalues)
#data_len, data = mupdf.fz_buffer_storage(res)
#if data_len < w * h:
# THROWMSG("bad alpha values")
# fixme: don't seem to need to create an fz_buffer - can
# use <alphavalues> directly?
if isinstance(alphavalues, (bytes, bytearray)):
data = alphavalues
data_len = len(alphavalues)
else:
assert 0, f'unexpected type for alphavalues: {type(alphavalues)}'
if data_len < w * h:
raise ValueError( "bad alpha values")
if 1:
# Use C implementation for speed.
mupdf.Pixmap_set_alpha_helper(
balen,
n,
data_len,
zero_out,
mupdf.python_buffer_data( data),
pix.m_internal,
premultiply,
bground,
colors,
bgcolor,
)
else:
i = k = j = 0
data_fix = 255
while i < balen:
alpha = data[k]
if zero_out:
for j in range(i, i+n):
if mupdf.fz_samples_get(pix, j) != colors[j - i]:
data_fix = 255
break
else:
data_fix = 0
if data_len:
def fz_mul255( a, b):
x = a * b + 128
x += x // 256
return x // 256
if data_fix == 0:
mupdf.fz_samples_set(pix, i+n, 0)
else:
mupdf.fz_samples_set(pix, i+n, alpha)
if premultiply and not bground:
for j in range(i, i+n):
mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha))
elif bground:
for j in range( i, i+n):
m = bgcolor[j - i]
mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha))
else:
mupdf.fz_samples_set(pix, i+n, data_fix)
i += n+1
k += 1
def tobytes(self, output="png", jpg_quality=95):
'''
Convert to binary image stream of desired type.
'''
valid_formats = {
"png": 1,
"pnm": 2,
"pgm": 2,
"ppm": 2,
"pbm": 2,
"pam": 3,
"tga": 4,
"tpic": 4,
"psd": 5,
"ps": 6,
'jpg': 7,
'jpeg': 7,
}
idx = valid_formats.get(output.lower(), None)
if idx is None:
raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
if self.alpha and idx in (2, 6, 7):
raise ValueError("'{output}' cannot have alpha")
if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
raise ValueError("unsupported colorspace for '{output}'")
if idx == 7:
self.set_dpi(self.xres, self.yres)
barray = self._tobytes(idx, jpg_quality)
return barray
def set_dpi(self, xres, yres):
"""Set resolution in both dimensions."""
pm = self.this
pm.m_internal.xres = xres
pm.m_internal.yres = yres
def set_origin(self, x, y):
"""Set top-left coordinates."""
pm = self.this
pm.m_internal.x = x
pm.m_internal.y = y
def set_pixel(self, x, y, color):
"""Set color of pixel (x, y)."""
if g_use_extra:
return extra.set_pixel(self.this.m_internal, x, y, color)
pm = self.this
if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1):
raise ValueError( MSG_PIXEL_OUTSIDE)
n = pm.n()
for j in range(n):
i = color[j]
if not _INRANGE(i, 0, 255):
raise ValueError( MSG_BAD_COLOR_SEQ)
stride = mupdf.fz_pixmap_stride( pm)
i = stride * y + n * x
if 0:
# Using a cached self._memory_view doesn't actually make much
# difference to speed.
if not self._memory_view:
self._memory_view = self.samples_mv
for j in range(n):
self._memory_view[i + j] = color[j]
else:
for j in range(n):
pm.fz_samples_set(i + j, color[j])
def set_rect(self, bbox, color):
"""Set color of all pixels in bbox."""
pm = self.this
n = pm.n()
c = []
for j in range(n):
i = color[j]
if not _INRANGE(i, 0, 255):
raise ValueError( MSG_BAD_COLOR_SEQ)
c.append(i)
bbox = JM_irect_from_py(bbox)
i = JM_fill_pixmap_rect_with_color(pm, c, bbox)
rc = bool(i)
return rc
def shrink(self, factor):
"""Divide width and height by 2**factor.
E.g. factor=1 shrinks to 25% of original size (in place)."""
if factor < 1:
message_warning("ignoring shrink factor < 1")
return
mupdf.fz_subsample_pixmap( self.this, factor)
# Pixmap has changed so clear our memory view.
self._memory_view = None
@property
def size(self):
"""Pixmap size."""
if mupdf_version_tuple >= (1, 23, 8):
return mupdf.fz_pixmap_size( self.this)
# fz_pixmap_size() is not publically visible, so we implement it
# ourselves. fixme: we don't add on sizeof(fz_pixmap).
pm = self.this
return pm.n() * pm.w() * pm.h()
@property
def stride(self):
"""Length of one image line (width * n)."""
return self.this.stride()
def tint_with(self, black, white):
"""Tint colors with modifiers for black and white."""
if not self.colorspace or self.colorspace.n > 3:
message("warning: colorspace invalid for function")
return
return mupdf.fz_tint_pixmap( self.this, black, white)
@property
def w(self):
"""The width."""
return mupdf.fz_pixmap_width(self.this)
def warp(self, quad, width, height):
"""Return pixmap from a warped quad."""
if not quad.is_convex: raise ValueError("quad must be convex")
q = JM_quad_from_py(quad)
points = [ q.ul, q.ur, q.lr, q.ll]
dst = mupdf.fz_warp_pixmap( self.this, points, width, height)
return Pixmap( dst)
@property
def x(self):
"""x component of Pixmap origin."""
return mupdf.fz_pixmap_x(self.this)
@property
def xres(self):
"""Resolution in x direction."""
return self.this.xres()
@property
def y(self):
"""y component of Pixmap origin."""
return mupdf.fz_pixmap_y(self.this)
@property
def yres(self):
"""Resolution in y direction."""
return self.this.yres()
width = w
height = h
del Point
class Point:
def __abs__(self):
return math.sqrt(self.x * self.x + self.y * self.y)
def __add__(self, p):
if hasattr(p, "__float__"):
return Point(self.x + p, self.y + p)
if len(p) != 2:
raise ValueError("Point: bad seq len")
return Point(self.x + p[0], self.y + p[1])
def __bool__(self):
return not (max(self) == min(self) == 0)
def __eq__(self, p):
if not hasattr(p, "__len__"):
return False
return len(p) == 2 and bool(self - p) is False
def __getitem__(self, i):
return (self.x, self.y)[i]
def __hash__(self):
return hash(tuple(self))
def __init__(self, *args, x=None, y=None):
'''
Point() - all zeros
Point(x, y)
Point(Point) - new copy
Point(sequence) - from 'sequence'
Explicit keyword args x, y override earlier settings if not None.
'''
if not args:
self.x = 0.0
self.y = 0.0
elif len(args) > 2:
raise ValueError("Point: bad seq len")
elif len(args) == 2:
self.x = float(args[0])
self.y = float(args[1])
elif len(args) == 1:
l = args[0]
if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)):
self.x = l.x
self.y = l.y
else:
if hasattr(l, "__getitem__") is False:
raise ValueError("Point: bad args")
if len(l) != 2:
raise ValueError("Point: bad seq len")
self.x = float(l[0])
self.y = float(l[1])
else:
raise ValueError("Point: bad seq len")
if x is not None: self.x = x
if y is not None: self.y = y
def __len__(self):
return 2
def __mul__(self, m):
if hasattr(m, "__float__"):
return Point(self.x * m, self.y * m)
p = Point(self)
return p.transform(m)
def __neg__(self):
return Point(-self.x, -self.y)
def __nonzero__(self):
return not (max(self) == min(self) == 0)
def __pos__(self):
return Point(self)
def __repr__(self):
return "Point" + str(tuple(self))
def __setitem__(self, i, v):
v = float(v)
if i == 0: self.x = v
elif i == 1: self.y = v
else:
raise IndexError("index out of range")
return None
def __sub__(self, p):
if hasattr(p, "__float__"):
return Point(self.x - p, self.y - p)
if len(p) != 2:
raise ValueError("Point: bad seq len")
return Point(self.x - p[0], self.y - p[1])
def __truediv__(self, m):
if hasattr(m, "__float__"):
return Point(self.x * 1./m, self.y * 1./m)
m1 = util_invert_matrix(m)[1]
if not m1:
raise ZeroDivisionError("matrix not invertible")
p = Point(self)
return p.transform(m1)
@property
def abs_unit(self):
"""Unit vector with positive coordinates."""
s = self.x * self.x + self.y * self.y
if s < EPSILON:
return Point(0,0)
s = math.sqrt(s)
return Point(abs(self.x) / s, abs(self.y) / s)
def distance_to(self, *args):
"""Return distance to rectangle or another point."""
if not len(args) > 0:
raise ValueError("at least one parameter must be given")
x = args[0]
if len(x) == 2:
x = Point(x)
elif len(x) == 4:
x = Rect(x)
else:
raise ValueError("arg1 must be point-like or rect-like")
if len(args) > 1:
unit = args[1]
else:
unit = "px"
u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.),
"mm": (25.4, 72.)}
f = u[unit][0] / u[unit][1]
if type(x) is Point:
return abs(self - x) * f
# from here on, x is a rectangle
# as a safeguard, make a finite copy of it
r = Rect(x.top_left, x.top_left)
r = r | x.bottom_right
if self in r:
return 0.0
if self.x > r.x1:
if self.y >= r.y1:
return self.distance_to(r.bottom_right, unit)
elif self.y <= r.y0:
return self.distance_to(r.top_right, unit)
else:
return (self.x - r.x1) * f
elif r.x0 <= self.x <= r.x1:
if self.y >= r.y1:
return (self.y - r.y1) * f
else:
return (r.y0 - self.y) * f
else:
if self.y >= r.y1:
return self.distance_to(r.bottom_left, unit)
elif self.y <= r.y0:
return self.distance_to(r.top_left, unit)
else:
return (r.x0 - self.x) * f
def transform(self, m):
"""Replace point by its transformation with matrix-like m."""
if len(m) != 6:
raise ValueError("Matrix: bad seq len")
self.x, self.y = util_transform_point(self, m)
return self
@property
def unit(self):
"""Unit vector of the point."""
s = self.x * self.x + self.y * self.y
if s < EPSILON:
return Point(0,0)
s = math.sqrt(s)
return Point(self.x / s, self.y / s)
__div__ = __truediv__
norm = __abs__
class Quad:
def __abs__(self):
if self.is_empty:
return 0.0
return abs(self.ul - self.ur) * abs(self.ul - self.ll)
def __add__(self, q):
if hasattr(q, "__float__"):
return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q)
if len(q) != 4:
raise ValueError("Quad: bad seq len")
return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3])
def __bool__(self):
return not self.is_empty
def __contains__(self, x):
try:
l = x.__len__()
except Exception:
if g_exceptions_verbose > 1: exception_info()
return False
if l == 2:
return util_point_in_quad(x, self)
if l != 4:
return False
if CheckRect(x):
if Rect(x).is_empty:
return True
return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self)
if CheckQuad(x):
for i in range(4):
if not util_point_in_quad(x[i], self):
return False
return True
return False
def __eq__(self, quad):
if not hasattr(quad, "__len__"):
return False
return len(quad) == 4 and (
self.ul == quad[0] and
self.ur == quad[1] and
self.ll == quad[2] and
self.lr == quad[3]
)
def __getitem__(self, i):
return (self.ul, self.ur, self.ll, self.lr)[i]
def __hash__(self):
return hash(tuple(self))
def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
'''
Quad() - all zero points
Quad(ul, ur, ll, lr)
Quad(quad) - new copy
Quad(sequence) - from 'sequence'
Explicit keyword args ul, ur, ll, lr override earlier settings if not
None.
'''
if not args:
self.ul = self.ur = self.ll = self.lr = Point()
elif len(args) > 4:
raise ValueError("Quad: bad seq len")
elif len(args) == 4:
self.ul, self.ur, self.ll, self.lr = map(Point, args)
elif len(args) == 1:
l = args[0]
if isinstance(l, mupdf.FzQuad):
self.this = l
self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
elif hasattr(l, "__getitem__") is False:
raise ValueError("Quad: bad args")
elif len(l) != 4:
raise ValueError("Quad: bad seq len")
else:
self.ul, self.ur, self.ll, self.lr = map(Point, l)
else:
raise ValueError("Quad: bad args")
if ul is not None: self.ul = Point(ul)
if ur is not None: self.ur = Point(ur)
if ll is not None: self.ll = Point(ll)
if lr is not None: self.lr = Point(lr)
def __len__(self):
return 4
def __mul__(self, m):
q = Quad(self)
q = q.transform(m)
return q
def __neg__(self):
return Quad(-self.ul, -self.ur, -self.ll, -self.lr)
def __nonzero__(self):
return not self.is_empty
def __pos__(self):
return Quad(self)
def __repr__(self):
return "Quad" + str(tuple(self))
def __setitem__(self, i, v):
if i == 0: self.ul = Point(v)
elif i == 1: self.ur = Point(v)
elif i == 2: self.ll = Point(v)
elif i == 3: self.lr = Point(v)
else:
raise IndexError("index out of range")
return None
def __sub__(self, q):
if hasattr(q, "__float__"):
return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q)
if len(q) != 4:
raise ValueError("Quad: bad seq len")
return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3])
def __truediv__(self, m):
if hasattr(m, "__float__"):
im = 1. / m
else:
im = util_invert_matrix(m)[1]
if not im:
raise ZeroDivisionError("Matrix not invertible")
q = Quad(self)
q = q.transform(im)
return q
@property
def is_convex(self):
"""Check if quad is convex and not degenerate.
Notes:
Check that for the two diagonals, the other two corners are not
on the same side of the diagonal.
Returns:
True or False.
"""
m = planish_line(self.ul, self.lr) # puts this diagonal on x-axis
p1 = self.ll * m # transform the
p2 = self.ur * m # other two points
if p1.y * p2.y > 0:
return False
m = planish_line(self.ll, self.ur) # puts other diagonal on x-axis
p1 = self.lr * m # tranform the
p2 = self.ul * m # remaining points
if p1.y * p2.y > 0:
return False
return True
@property
def is_empty(self):
"""Check whether all quad corners are on the same line.
This is the case if width or height is zero.
"""
return self.width < EPSILON or self.height < EPSILON
@property
def is_infinite(self):
"""Check whether this is the infinite quad."""
return self.rect.is_infinite
@property
def is_rectangular(self):
"""Check if quad is rectangular.
Notes:
Some rotation matrix can thus transform it into a rectangle.
This is equivalent to three corners enclose 90 degrees.
Returns:
True or False.
"""
sine = util_sine_between(self.ul, self.ur, self.lr)
if abs(sine - 1) > EPSILON: # the sine of the angle
return False
sine = util_sine_between(self.ur, self.lr, self.ll)
if abs(sine - 1) > EPSILON:
return False
sine = util_sine_between(self.lr, self.ll, self.ul)
if abs(sine - 1) > EPSILON:
return False
return True
def morph(self, p, m):
"""Morph the quad with matrix-like 'm' and point-like 'p'.
Return a new quad."""
if self.is_infinite:
return INFINITE_QUAD()
delta = Matrix(1, 1).pretranslate(p.x, p.y)
q = self * ~delta * m * delta
return q
@property
def rect(self):
r = Rect()
r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
return r
def transform(self, m):
"""Replace quad by its transformation with matrix m."""
if hasattr(m, "__float__"):
pass
elif len(m) != 6:
raise ValueError("Matrix: bad seq len")
self.ul *= m
self.ur *= m
self.ll *= m
self.lr *= m
return self
__div__ = __truediv__
width = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr)))
height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr)))
class Rect:
def __abs__(self):
if self.is_empty or self.is_infinite:
return 0.0
return (self.x1 - self.x0) * (self.y1 - self.y0)
def __add__(self, p):
if hasattr(p, "__float__"):
return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p)
if len(p) != 4:
raise ValueError("Rect: bad seq len")
return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3])
def __and__(self, x):
if not hasattr(x, "__len__"):
raise ValueError("bad operand 2")
r1 = Rect(x)
r = Rect(self)
return r.intersect(r1)
def __bool__(self):
return not (max(self) == min(self) == 0)
def __contains__(self, x):
if hasattr(x, "__float__"):
return x in tuple(self)
l = len(x)
if l == 2:
return util_is_point_in_rect(x, self)
if l == 4:
r = INFINITE_RECT()
try:
r = Rect(x)
except Exception:
if g_exceptions_verbose > 1: exception_info()
r = Quad(x).rect
return (self.x0 <= r.x0 <= r.x1 <= self.x1 and
self.y0 <= r.y0 <= r.y1 <= self.y1)
return False
def __eq__(self, rect):
if not hasattr(rect, "__len__"):
return False
return len(rect) == 4 and bool(self - rect) is False
def __getitem__(self, i):
return (self.x0, self.y0, self.x1, self.y1)[i]
def __hash__(self):
return hash(tuple(self))
def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
"""
Rect() - all zeros
Rect(x0, y0, x1, y1)
Rect(top-left, x1, y1)
Rect(x0, y0, bottom-right)
Rect(top-left, bottom-right)
Rect(Rect or IRect) - new copy
Rect(sequence) - from 'sequence'
Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings
if not None.
"""
x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
self.x0 = float( x0)
self.y0 = float( y0)
self.x1 = float( x1)
self.y1 = float( y1)
def __len__(self):
return 4
def __mul__(self, m):
if hasattr(m, "__float__"):
return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m)
r = Rect(self)
r = r.transform(m)
return r
def __neg__(self):
return Rect(-self.x0, -self.y0, -self.x1, -self.y1)
def __nonzero__(self):
return not (max(self) == min(self) == 0)
def __or__(self, x):
if not hasattr(x, "__len__"):
raise ValueError("bad operand 2")
r = Rect(self)
if len(x) == 2:
return r.include_point(x)
if len(x) == 4:
return r.include_rect(x)
raise ValueError("bad operand 2")
def __pos__(self):
return Rect(self)
def __repr__(self):
return "Rect" + str(tuple(self))
def __setitem__(self, i, v):
v = float(v)
if i == 0: self.x0 = v
elif i == 1: self.y0 = v
elif i == 2: self.x1 = v
elif i == 3: self.y1 = v
else:
raise IndexError("index out of range")
return None
def __sub__(self, p):
if hasattr(p, "__float__"):
return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p)
if len(p) != 4:
raise ValueError("Rect: bad seq len")
return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3])
def __truediv__(self, m):
if hasattr(m, "__float__"):
return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m)
im = util_invert_matrix(m)[1]
if not im:
raise ZeroDivisionError(f"Matrix not invertible: {m}")
r = Rect(self)
r = r.transform(im)
return r
@property
def bottom_left(self):
"""Bottom-left corner."""
return Point(self.x0, self.y1)
@property
def bottom_right(self):
"""Bottom-right corner."""
return Point(self.x1, self.y1)
def contains(self, x):
"""Check if containing point-like or rect-like x."""
return self.__contains__(x)
@property
def height(self):
return max(0, self.y1 - self.y0)
def include_point(self, p):
"""Extend to include point-like p."""
if len(p) != 2:
raise ValueError("Point: bad seq len")
self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p)
return self
def include_rect(self, r):
"""Extend to include rect-like r."""
if len(r) != 4:
raise ValueError("Rect: bad seq len")
r = Rect(r)
if r.is_infinite or self.is_infinite:
self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT
elif r.is_empty:
return self
elif self.is_empty:
self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
else:
self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r)
return self
def intersect(self, r):
"""Restrict to common rect with rect-like r."""
if not len(r) == 4:
raise ValueError("Rect: bad seq len")
r = Rect(r)
if r.is_infinite:
return self
elif self.is_infinite:
self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
elif r.is_empty:
self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
elif self.is_empty:
return self
else:
self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r)
return self
def intersects(self, x):
"""Check if intersection with rectangle x is not empty."""
r1 = Rect(x)
if self.is_empty or self.is_infinite or r1.is_empty or r1.is_infinite:
return False
r = Rect(self)
if r.intersect(r1).is_empty:
return False
return True
@property
def is_empty(self):
"""True if rectangle area is empty."""
return self.x0 >= self.x1 or self.y0 >= self.y1
@property
def is_infinite(self):
"""True if this is the infinite rectangle."""
return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
@property
def is_valid(self):
"""True if rectangle is valid."""
return self.x0 <= self.x1 and self.y0 <= self.y1
def morph(self, p, m):
"""Morph with matrix-like m and point-like p.
Returns a new quad."""
if self.is_infinite:
return INFINITE_QUAD()
return self.quad.morph(p, m)
def norm(self):
return math.sqrt(sum([c*c for c in self]))
def normalize(self):
"""Replace rectangle with its finite version."""
if self.x1 < self.x0:
self.x0, self.x1 = self.x1, self.x0
if self.y1 < self.y0:
self.y0, self.y1 = self.y1, self.y0
return self
@property
def quad(self):
"""Return Quad version of rectangle."""
return Quad(self.tl, self.tr, self.bl, self.br)
def round(self):
"""Return the IRect."""
return IRect(util_round_rect(self))
@property
def top_left(self):
"""Top-left corner."""
return Point(self.x0, self.y0)
@property
def top_right(self):
"""Top-right corner."""
return Point(self.x1, self.y0)
def torect(self, r):
"""Return matrix that converts to target rect."""
r = Rect(r)
if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
raise ValueError("rectangles must be finite and not empty")
return (
Matrix(1, 0, 0, 1, -self.x0, -self.y0)
* Matrix(r.width / self.width, r.height / self.height)
* Matrix(1, 0, 0, 1, r.x0, r.y0)
)
def transform(self, m):
"""Replace with the transformation by matrix-like m."""
if not len(m) == 6:
raise ValueError("Matrix: bad seq len")
self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
return self
@property
def width(self):
return max(0, self.x1 - self.x0)
__div__ = __truediv__
bl = bottom_left
br = bottom_right
irect = property(round)
tl = top_left
tr = top_right
class Shape:
"""Create a new shape."""
def __init__(self, page: Page):
CheckParent(page)
self.page = page
self.doc = page.parent
if not self.doc.is_pdf:
raise ValueError("not a PDF")
self.height = page.mediabox_size.y
self.width = page.mediabox_size.x
self.x = page.cropbox_position.x
self.y = page.cropbox_position.y
self.pctm = page.transformation_matrix # page transf. matrix
self.ipctm = ~self.pctm # inverted transf. matrix
self.draw_cont = ""
self.text_cont = ""
self.totalcont = ""
self.last_point = None
self.rect = None
def commit(self, overlay: bool = True) -> None:
"""
Update the page's /Contents object with Shape data. The argument
controls whether data appear in foreground (default) or background.
"""
CheckParent(self.page) # doc may have died meanwhile
self.totalcont += self.text_cont
self.totalcont = self.totalcont.encode()
if self.totalcont != b"":
# make /Contents object with dummy stream
xref = TOOLS._insert_contents(self.page, b" ", overlay)
# update it with potential compression
mupdf.pdf_update_stream(self.doc, xref, self.totalcont)
self.last_point = None # clean up ...
self.rect = None #
self.draw_cont = "" # for potential ...
self.text_cont = "" # ...
self.totalcont = "" # re-use
return
def draw_bezier(
self,
p1: point_like,
p2: point_like,
p3: point_like,
p4: point_like,
):# -> Point:
"""Draw a standard cubic Bezier curve."""
p1 = Point(p1)
p2 = Point(p2)
p3 = Point(p3)
p4 = Point(p4)
if not (self.last_point == p1):
args = JM_TUPLE(p1 * self.ipctm)
self.draw_cont += f"{_format_g(args)} m\n"
args = JM_TUPLE(list(p2 * self.ipctm) + list(p3 * self.ipctm) + list(p4 * self.ipctm))
self.draw_cont += f"{_format_g(args)} c\n"
self.updateRect(p1)
self.updateRect(p2)
self.updateRect(p3)
self.updateRect(p4)
self.last_point = p4
return self.last_point
def draw_circle(self, center: point_like, radius: float):# -> Point:
"""Draw a circle given its center and radius."""
if not radius > EPSILON:
raise ValueError("radius must be postive")
center = Point(center)
p1 = center - (radius, 0)
return self.draw_sector(center, p1, 360, fullSector=False)
def draw_curve(
self,
p1: point_like,
p2: point_like,
p3: point_like,
):# -> Point:
"""Draw a curve between points using one control point."""
kappa = 0.55228474983
p1 = Point(p1)
p2 = Point(p2)
p3 = Point(p3)
k1 = p1 + (p2 - p1) * kappa
k2 = p3 + (p2 - p3) * kappa
return self.draw_bezier(p1, k1, k2, p3)
def draw_line(self, p1: point_like, p2: point_like):# -> Point:
"""Draw a line between two points."""
p1 = Point(p1)
p2 = Point(p2)
if not (self.last_point == p1):
self.draw_cont += _format_g(JM_TUPLE(p1 * self.ipctm)) + " m\n"
self.last_point = p1
self.updateRect(p1)
self.draw_cont += _format_g(JM_TUPLE(p2 * self.ipctm)) + " l\n"
self.updateRect(p2)
self.last_point = p2
return self.last_point
def draw_oval(self, tetra: typing.Union[quad_like, rect_like]):# -> Point:
"""Draw an ellipse inside a tetrapod."""
if len(tetra) != 4:
raise ValueError("invalid arg length")
if hasattr(tetra[0], "__float__"):
q = Rect(tetra).quad
else:
q = Quad(tetra)
mt = q.ul + (q.ur - q.ul) * 0.5
mr = q.ur + (q.lr - q.ur) * 0.5
mb = q.ll + (q.lr - q.ll) * 0.5
ml = q.ul + (q.ll - q.ul) * 0.5
if not (self.last_point == ml):
self.draw_cont += _format_g(JM_TUPLE(ml * self.ipctm)) + " m\n"
self.last_point = ml
self.draw_curve(ml, q.ll, mb)
self.draw_curve(mb, q.lr, mr)
self.draw_curve(mr, q.ur, mt)
self.draw_curve(mt, q.ul, ml)
self.updateRect(q.rect)
self.last_point = ml
return self.last_point
def draw_polyline(self, points: list):# -> Point:
"""Draw several connected line segments."""
for i, p in enumerate(points):
if i == 0:
if not (self.last_point == Point(p)):
self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " m\n"
self.last_point = Point(p)
else:
self.draw_cont += _format_g(JM_TUPLE(Point(p) * self.ipctm)) + " l\n"
self.updateRect(p)
self.last_point = Point(points[-1])
return self.last_point
def draw_quad(self, quad: quad_like):# -> Point:
"""Draw a Quad."""
q = Quad(quad)
return self.draw_polyline([q.ul, q.ll, q.lr, q.ur, q.ul])
def draw_rect(self, rect: rect_like):# -> Point:
"""Draw a rectangle."""
r = Rect(rect)
args = JM_TUPLE(list(r.bl * self.ipctm) + [r.width, r.height])
self.draw_cont += _format_g(args) + " re\n"
self.updateRect(r)
self.last_point = r.tl
return self.last_point
def draw_sector(
self,
center: point_like,
point: point_like,
beta: float,
fullSector: bool = True,
):# -> Point:
"""Draw a circle sector."""
center = Point(center)
point = Point(point)
def l3(a, b):
return _format_g((a, b)) + " m\n"
def l4(a, b, c, d, e, f):
return _format_g((a, b, c, d, e, f)) + " c\n"
def l5(a, b):
return _format_g((a, b)) + " l\n"
betar = math.radians(-beta)
w360 = math.radians(math.copysign(360, betar)) * (-1)
w90 = math.radians(math.copysign(90, betar))
w45 = w90 / 2
while abs(betar) > 2 * math.pi:
betar += w360 # bring angle below 360 degrees
if not (self.last_point == point):
self.draw_cont += l3(JM_TUPLE(point * self.ipctm))
self.last_point = point
Q = Point(0, 0) # just make sure it exists
C = center
P = point
S = P - C # vector 'center' -> 'point'
rad = abs(S) # circle radius
if not rad > EPSILON:
raise ValueError("radius must be positive")
alfa = self.horizontal_angle(center, point)
while abs(betar) > abs(w90): # draw 90 degree arcs
q1 = C.x + math.cos(alfa + w90) * rad
q2 = C.y + math.sin(alfa + w90) * rad
Q = Point(q1, q2) # the arc's end point
r1 = C.x + math.cos(alfa + w45) * rad / math.cos(w45)
r2 = C.y + math.sin(alfa + w45) * rad / math.cos(w45)
R = Point(r1, r2) # crossing point of tangents
kappah = (1 - math.cos(w45)) * 4 / 3 / abs(R - Q)
kappa = kappah * abs(P - Q)
cp1 = P + (R - P) * kappa # control point 1
cp2 = Q + (R - Q) * kappa # control point 2
self.draw_cont += l4(JM_TUPLE(
list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
))
betar -= w90 # reduce parm angle by 90 deg
alfa += w90 # advance start angle by 90 deg
P = Q # advance to arc end point
# draw (remaining) arc
if abs(betar) > 1e-3: # significant degrees left?
beta2 = betar / 2
q1 = C.x + math.cos(alfa + betar) * rad
q2 = C.y + math.sin(alfa + betar) * rad
Q = Point(q1, q2) # the arc's end point
r1 = C.x + math.cos(alfa + beta2) * rad / math.cos(beta2)
r2 = C.y + math.sin(alfa + beta2) * rad / math.cos(beta2)
R = Point(r1, r2) # crossing point of tangents
# kappa height is 4/3 of segment height
kappah = (1 - math.cos(beta2)) * 4 / 3 / abs(R - Q) # kappa height
kappa = kappah * abs(P - Q) / (1 - math.cos(betar))
cp1 = P + (R - P) * kappa # control point 1
cp2 = Q + (R - Q) * kappa # control point 2
self.draw_cont += l4(JM_TUPLE(
list(cp1 * self.ipctm) + list(cp2 * self.ipctm) + list(Q * self.ipctm)
))
if fullSector:
self.draw_cont += l3(JM_TUPLE(point * self.ipctm))
self.draw_cont += l5(JM_TUPLE(center * self.ipctm))
self.draw_cont += l5(JM_TUPLE(Q * self.ipctm))
self.last_point = Q
return self.last_point
def draw_squiggle(
self,
p1: point_like,
p2: point_like,
breadth=2,
):# -> Point:
"""Draw a squiggly line from p1 to p2."""
p1 = Point(p1)
p2 = Point(p2)
S = p2 - p1 # vector start - end
rad = abs(S) # distance of points
cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
if cnt < 4:
raise ValueError("points too close")
mb = rad / cnt # revised breadth
matrix = Matrix(TOOLS._hor_matrix(p1, p2)) # normalize line to x-axis
i_mat = ~matrix # get original position
k = 2.4142135623765633 # y of draw_curve helper point
points = [] # stores edges
for i in range(1, cnt):
if i % 4 == 1: # point "above" connection
p = Point(i, -k) * mb
elif i % 4 == 3: # point "below" connection
p = Point(i, k) * mb
else: # else on connection line
p = Point(i, 0) * mb
points.append(p * i_mat)
points = [p1] + points + [p2]
cnt = len(points)
i = 0
while i + 2 < cnt:
self.draw_curve(points[i], points[i + 1], points[i + 2])
i += 2
return p2
def draw_zigzag(
self,
p1: point_like,
p2: point_like,
breadth: float = 2,
):# -> Point:
"""Draw a zig-zagged line from p1 to p2."""
p1 = Point(p1)
p2 = Point(p2)
S = p2 - p1 # vector start - end
rad = abs(S) # distance of points
cnt = 4 * int(round(rad / (4 * breadth), 0)) # always take full phases
if cnt < 4:
raise ValueError("points too close")
mb = rad / cnt # revised breadth
matrix = Matrix(TOOLS._hor_matrix(p1, p2)) # normalize line to x-axis
i_mat = ~matrix # get original position
points = [] # stores edges
for i in range(1, cnt):
if i % 4 == 1: # point "above" connection
p = Point(i, -1) * mb
elif i % 4 == 3: # point "below" connection
p = Point(i, 1) * mb
else: # ignore others
continue
points.append(p * i_mat)
self.draw_polyline([p1] + points + [p2]) # add start and end points
return p2
def finish(
self,
width: float = 1,
color: OptSeq = (0,),
fill: OptSeq = None,
lineCap: int = 0,
lineJoin: int = 0,
dashes: OptStr = None,
even_odd: bool = False,
morph: OptSeq = None,
closePath: bool = True,
fill_opacity: float = 1,
stroke_opacity: float = 1,
oc: int = 0,
) -> None:
"""Finish the current drawing segment.
Notes:
Apply colors, opacity, dashes, line style and width, or
morphing. Also whether to close the path
by connecting last to first point.
"""
if self.draw_cont == "": # treat empty contents as no-op
return
if width == 0: # border color makes no sense then
color = None
elif color is None: # vice versa
width = 0
# if color == None and fill == None:
# raise ValueError("at least one of 'color' or 'fill' must be given")
color_str = ColorCode(color, "c") # ensure proper color string
fill_str = ColorCode(fill, "f") # ensure proper fill string
optcont = self.page._get_optional_content(oc)
if optcont is not None:
self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont
emc = "EMC\n"
else:
emc = ""
alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
if alpha is not None:
self.draw_cont = "/%s gs\n" % alpha + self.draw_cont
if width != 1 and width != 0:
self.draw_cont += _format_g(width) + " w\n"
if lineCap != 0:
self.draw_cont = "%i J\n" % lineCap + self.draw_cont
if lineJoin != 0:
self.draw_cont = "%i j\n" % lineJoin + self.draw_cont
if dashes not in (None, "", "[] 0"):
self.draw_cont = "%s d\n" % dashes + self.draw_cont
if closePath:
self.draw_cont += "h\n"
self.last_point = None
if color is not None:
self.draw_cont += color_str
if fill is not None:
self.draw_cont += fill_str
if color is not None:
if not even_odd:
self.draw_cont += "B\n"
else:
self.draw_cont += "B*\n"
else:
if not even_odd:
self.draw_cont += "f\n"
else:
self.draw_cont += "f*\n"
else:
self.draw_cont += "S\n"
self.draw_cont += emc
if CheckMorph(morph):
m1 = Matrix(
1, 0, 0, 1, morph[0].x + self.x, self.height - morph[0].y - self.y
)
mat = ~m1 * morph[1] * m1
self.draw_cont = _format_g(JM_TUPLE(mat) + self.draw_cont) + " cm\n"
self.totalcont += "\nq\n" + self.draw_cont + "Q\n"
self.draw_cont = ""
self.last_point = None
return
@staticmethod
def horizontal_angle(C, P):
"""Return the angle to the horizontal for the connection from C to P.
This uses the arcus sine function and resolves its inherent ambiguity by
looking up in which quadrant vector S = P - C is located.
"""
S = Point(P - C).unit # unit vector 'C' -> 'P'
alfa = math.asin(abs(S.y)) # absolute angle from horizontal
if S.x < 0: # make arcsin result unique
if S.y <= 0: # bottom-left
alfa = -(math.pi - alfa)
else: # top-left
alfa = math.pi - alfa
else:
if S.y >= 0: # top-right
pass
else: # bottom-right
alfa = -alfa
return alfa
def insert_text(
self,
point: point_like,
buffer_: typing.Union[str, list],
fontsize: float = 11,
lineheight: OptFloat = None,
fontname: str = "helv",
fontfile: OptStr = None,
set_simple: bool = 0,
encoding: int = 0,
color: OptSeq = None,
fill: OptSeq = None,
render_mode: int = 0,
border_width: float = 1,
rotate: int = 0,
morph: OptSeq = None,
stroke_opacity: float = 1,
fill_opacity: float = 1,
oc: int = 0,
) -> int:
# ensure 'text' is a list of strings, worth dealing with
if not bool(buffer_):
return 0
if type(buffer_) not in (list, tuple):
text = buffer_.splitlines()
else:
text = buffer_
if not len(text) > 0:
return 0
point = Point(point)
try:
maxcode = max([ord(c) for c in " ".join(text)])
except Exception:
exception_info()
return 0
# ensure valid 'fontname'
fname = fontname
if fname.startswith("/"):
fname = fname[1:]
xref = self.page.insert_font(
fontname=fname,
fontfile=fontfile,
encoding=encoding,
set_simple=set_simple,
)
fontinfo = CheckFontInfo(self.doc, xref)
fontdict = fontinfo[1]
ordering = fontdict["ordering"]
simple = fontdict["simple"]
bfname = fontdict["name"]
ascender = fontdict["ascender"]
descender = fontdict["descender"]
if lineheight:
lheight = fontsize * lineheight
elif ascender - descender <= 1:
lheight = fontsize * 1.2
else:
lheight = fontsize * (ascender - descender)
if maxcode > 255:
glyphs = self.doc.get_char_widths(xref, maxcode + 1)
else:
glyphs = fontdict["glyphs"]
tab = []
for t in text:
if simple and bfname not in ("Symbol", "ZapfDingbats"):
g = None
else:
g = glyphs
tab.append(getTJstr(t, g, simple, ordering))
text = tab
color_str = ColorCode(color, "c")
fill_str = ColorCode(fill, "f")
if not fill and render_mode == 0: # ensure fill color when 0 Tr
fill = color
fill_str = ColorCode(color, "f")
morphing = CheckMorph(morph)
rot = rotate
if rot % 90 != 0:
raise ValueError("bad rotate value")
while rot < 0:
rot += 360
rot = rot % 360 # text rotate = 0, 90, 270, 180
templ1 = lambda a, b, c, d, e, f, g: f"\nq\n{a}{b}BT\n%{c}1 0 0 1 {_format_g((d, e))} Tm\n/{f} {g} Tf "
templ2 = lambda a: f"TJ\n0 -{_format_g(a)} TD\n"
cmp90 = "0 1 -1 0 0 0 cm\n" # rotates 90 deg counter-clockwise
cmm90 = "0 -1 1 0 0 0 cm\n" # rotates 90 deg clockwise
cm180 = "-1 0 0 -1 0 0 cm\n" # rotates by 180 deg.
height = self.height
width = self.width
# setting up for standard rotation directions
# case rotate = 0
if morphing:
m1 = Matrix(1, 0, 0, 1, morph[0].x + self.x, height - morph[0].y - self.y)
mat = ~m1 * morph[1] * m1
cm = _format_g(JM_TUPLE(mat)) + " cm\n"
else:
cm = ""
top = height - point.y - self.y # start of 1st char
left = point.x + self.x # start of 1. char
space = top # space available
if rot == 90:
left = height - point.y - self.y
top = -point.x - self.x
cm += cmp90
space = width - abs(top)
elif rot == 270:
left = -height + point.y + self.y
top = point.x + self.x
cm += cmm90
space = abs(top)
elif rot == 180:
left = -point.x - self.x
top = -height + point.y + self.y
cm += cm180
space = abs(point.y + self.y)
optcont = self.page._get_optional_content(oc)
if optcont is not None:
bdc = "/OC /%s BDC\n" % optcont
emc = "EMC\n"
else:
bdc = emc = ""
alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity)
if alpha is None:
alpha = ""
else:
alpha = "/%s gs\n" % alpha
nres = templ1(bdc, alpha, cm, left, top, fname, fontsize)
if render_mode > 0:
nres += "%i Tr " % render_mode
if border_width != 1:
nres += _format_g(border_width) + " w "
if color is not None:
nres += color_str
if fill is not None:
nres += fill_str
# =========================================================================
# start text insertion
# =========================================================================
nres += text[0]
nlines = 1 # set output line counter
if len(text) > 1:
nres += templ2(lheight) # line 1
else:
nres += 'TJ'
for i in range(1, len(text)):
if space < lheight:
break # no space left on page
if i > 1:
nres += "\nT* "
nres += text[i] + 'TJ'
space -= lheight
nlines += 1
nres += "\nET\n%sQ\n" % emc
# =========================================================================
# end of text insertion
# =========================================================================
# update the /Contents object
self.text_cont += nres
return nlines
def update_rect(self, x):
if self.rect is None:
if len(x) == 2:
self.rect = Rect(x, x)
else:
self.rect = Rect(x)
else:
if len(x) == 2:
x = Point(x)
self.rect.x0 = min(self.rect.x0, x.x)
self.rect.y0 = min(self.rect.y0, x.y)
self.rect.x1 = max(self.rect.x1, x.x)
self.rect.y1 = max(self.rect.y1, x.y)
else:
x = Rect(x)
self.rect.x0 = min(self.rect.x0, x.x0)
self.rect.y0 = min(self.rect.y0, x.y0)
self.rect.x1 = max(self.rect.x1, x.x1)
self.rect.y1 = max(self.rect.y1, x.y1)
class Story:
def __init__( self, html='', user_css=None, em=12, archive=None):
buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
if archive and not isinstance(archive, Archive):
archive = Archive(archive)
arch = archive.this if archive else mupdf.FzArchive( None)
if hasattr(mupdf, 'FzStoryS'):
self.this = mupdf.FzStoryS( buffer_, user_css, em, arch)
else:
self.this = mupdf.FzStory( buffer_, user_css, em, arch)
def add_header_ids(self):
'''
Look for `<h1..6>` items in `self` and adds unique `id`
attributes if not already present.
'''
dom = self.body
i = 0
x = dom.find(None, None, None)
while x:
name = x.tagname
if len(name) == 2 and name[0]=="h" and name[1] in "123456":
attr = x.get_attribute_value("id")
if not attr:
id_ = f"h_id_{i}"
#log(f"{name=}: setting {id_=}")
x.set_attribute("id", id_)
i += 1
x = x.find_next(None, None, None)
@staticmethod
def add_pdf_links(document_or_stream, positions):
"""
Adds links to PDF document.
Args:
document_or_stream:
A PDF `Document` or raw PDF content, for example an
`io.BytesIO` instance.
positions:
List of `ElementPosition`'s for `document_or_stream`,
typically from Story.element_positions(). We raise an
exception if two or more positions have same id.
Returns:
`document_or_stream` if a `Document` instance, otherwise a
new `Document` instance.
We raise an exception if an `href` in `positions` refers to an
internal position `#<name>` but no item in `postions` has `id =
name`.
"""
if isinstance(document_or_stream, Document):
document = document_or_stream
else:
document = Document("pdf", document_or_stream)
# Create dict from id to position, which we will use to find
# link destinations.
#
id_to_position = dict()
#log(f"positions: {positions}")
for position in positions:
#log(f"add_pdf_links(): position: {position}")
if (position.open_close & 1) and position.id:
#log(f"add_pdf_links(): position with id: {position}")
if position.id in id_to_position:
#log(f"Ignoring duplicate positions with id={position.id!r}")
pass
else:
id_to_position[ position.id] = position
# Insert links for all positions that have an `href`.
#
for position_from in positions:
if (position_from.open_close & 1) and position_from.href:
#log(f"add_pdf_links(): position with href: {position}")
link = dict()
link['from'] = Rect(position_from.rect)
if position_from.href.startswith("#"):
#`<a href="#...">...</a>` internal link.
target_id = position_from.href[1:]
try:
position_to = id_to_position[ target_id]
except Exception as e:
if g_exceptions_verbose > 1: exception_info()
raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e
# Make link from `position_from`'s rect to top-left of
# `position_to`'s rect.
if 0:
log(f"add_pdf_links(): making link from:")
log(f"add_pdf_links(): {position_from}")
log(f"add_pdf_links(): to:")
log(f"add_pdf_links(): {position_to}")
link["kind"] = LINK_GOTO
x0, y0, x1, y1 = position_to.rect
# This appears to work well with viewers which scroll
# to make destination point top-left of window.
link["to"] = Point(x0, y0)
link["page"] = position_to.page_num - 1
else:
# `<a href="...">...</a>` external link.
if position_from.href.startswith('name:'):
link['kind'] = LINK_NAMED
link['name'] = position_from.href[5:]
else:
link['kind'] = LINK_URI
link['uri'] = position_from.href
#log(f'Adding link: {position_from.page_num=} {link=}.')
document[position_from.page_num - 1].insert_link(link)
return document
@property
def body(self):
dom = self.document()
return dom.bodytag()
def document( self):
dom = mupdf.fz_story_document( self.this)
return Xml( dom)
def draw( self, device, matrix=None):
ctm2 = JM_matrix_from_py( matrix)
dev = device.this if device else mupdf.FzDevice( None)
mupdf.fz_draw_story( self.this, dev, ctm2)
def element_positions( self, function, args=None):
'''
Trigger a callback function to record where items have been placed.
'''
if type(args) is dict:
for k in args.keys():
if not (type(k) is str and k.isidentifier()):
raise ValueError(f"invalid key '{k}'")
else:
args = {}
if not callable(function) or function.__code__.co_argcount != 1:
raise ValueError("callback 'function' must be a callable with exactly one argument")
def function2( position):
class Position2:
pass
position2 = Position2()
position2.depth = position.depth
position2.heading = position.heading
position2.id = position.id
position2.rect = JM_py_from_rect(position.rect)
position2.text = position.text
position2.open_close = position.open_close
position2.rect_num = position.rectangle_num
position2.href = position.href
if args:
for k, v in args.items():
setattr( position2, k, v)
function( position2)
mupdf.fz_story_positions( self.this, function2)
def place( self, where):
where = JM_rect_from_py( where)
filled = mupdf.FzRect()
more = mupdf.fz_place_story( self.this, where, filled)
return more, JM_py_from_rect( filled)
def reset( self):
mupdf.fz_reset_story( self.this)
def write(self, writer, rectfn, positionfn=None, pagefn=None):
dev = None
page_num = 0
rect_num = 0
filled = Rect(0, 0, 0, 0)
while 1:
mediabox, rect, ctm = rectfn(rect_num, filled)
rect_num += 1
if mediabox:
# new page.
page_num += 1
more, filled = self.place( rect)
if positionfn:
def positionfn2(position):
# We add a `.page_num` member to the
# `ElementPosition` instance.
position.page_num = page_num
positionfn(position)
self.element_positions(positionfn2)
if writer:
if mediabox:
# new page.
if dev:
if pagefn:
pagefn(page_num, mediabox, dev, 1)
writer.end_page()
dev = writer.begin_page( mediabox)
if pagefn:
pagefn(page_num, mediabox, dev, 0)
self.draw( dev, ctm)
if not more:
if pagefn:
pagefn( page_num, mediabox, dev, 1)
writer.end_page()
else:
self.draw(None, ctm)
if not more:
break
@staticmethod
def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
positions = list()
content = None
# Iterate until stable.
while 1:
content_prev = content
content = contentfn( positions)
stable = False
if content == content_prev:
stable = True
content2 = content
story = Story(content2, user_css, em, archive)
if add_header_ids:
story.add_header_ids()
positions = list()
def positionfn2(position):
#log(f"write_stabilized(): {stable=} {positionfn=} {position=}")
positions.append(position)
if stable and positionfn:
positionfn(position)
story.write(
writer if stable else None,
rectfn,
positionfn2,
pagefn,
)
if stable:
break
@staticmethod
def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
#log("write_stabilized_with_links()")
stream = io.BytesIO()
writer = DocumentWriter(stream)
positions = []
def positionfn2(position):
#log(f"write_stabilized_with_links(): {position=}")
positions.append(position)
if positionfn:
positionfn(position)
Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids)
writer.close()
stream.seek(0)
return Story.add_pdf_links(stream, positions)
def write_with_links(self, rectfn, positionfn=None, pagefn=None):
#log("write_with_links()")
stream = io.BytesIO()
writer = DocumentWriter(stream)
positions = []
def positionfn2(position):
#log(f"write_with_links(): {position=}")
positions.append(position)
if positionfn:
positionfn(position)
self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn)
writer.close()
stream.seek(0)
return Story.add_pdf_links(stream, positions)
class FitResult:
'''
The result from a `Story.fit*()` method.
Members:
`big_enough`:
`True` if the fit succeeded.
`filled`:
From the last call to `Story.place()`.
`more`:
`False` if the fit succeeded.
`numcalls`:
Number of calls made to `self.place()`.
`parameter`:
The successful parameter value, or the largest failing value.
`rect`:
The rect created from `parameter`.
'''
def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
self.big_enough = big_enough
self.filled = filled
self.more = more
self.numcalls = numcalls
self.parameter = parameter
self.rect = rect
def __repr__(self):
return (
f' big_enough={self.big_enough}'
f' filled={self.filled}'
f' more={self.more}'
f' numcalls={self.numcalls}'
f' parameter={self.parameter}'
f' rect={self.rect}'
)
def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
'''
Finds optimal rect that contains the story `self`.
Returns a `Story.FitResult` instance.
On success, the last call to `self.place()` will have been with the
returned rectangle, so `self.draw()` can be used directly.
Args:
:arg fn:
A callable taking a floating point `parameter` and returning a
`pymupdf.Rect()`. If the rect is empty, we assume the story will
not fit and do not call `self.place()`.
Must guarantee that `self.place()` behaves monotonically when
given rect `fn(parameter`) as `parameter` increases. This
usually means that both width and height increase or stay
unchanged as `parameter` increases.
:arg pmin:
Minimum parameter to consider; `None` for -infinity.
:arg pmax:
Maximum parameter to consider; `None` for +infinity.
:arg delta:
Maximum error in returned `parameter`.
:arg verbose:
If true we output diagnostics.
'''
def log(text):
assert verbose
message(f'fit(): {text}')
assert isinstance(pmin, (int, float)) or pmin is None
assert isinstance(pmax, (int, float)) or pmax is None
class State:
def __init__(self):
self.pmin = pmin
self.pmax = pmax
self.pmin_result = None
self.pmax_result = None
self.result = None
self.numcalls = 0
if verbose:
self.pmin0 = pmin
self.pmax0 = pmax
state = State()
if verbose:
log(f'starting. {state.pmin=} {state.pmax=}.')
self.reset()
def ret():
if state.pmax is not None:
if state.last_p != state.pmax:
if verbose:
log(f'Calling update() with pmax, because was overwritten by later calls.')
big_enough = update(state.pmax)
assert big_enough
result = state.pmax_result
else:
result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls)
if verbose:
log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}')
return result
def update(parameter):
'''
Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
false, then `rect` is big enought to contain `self` and we
set `state.pmax=parameter` and return True. Otherwise we set
`state.pmin=parameter` and return False.
'''
rect = fn(parameter)
assert isinstance(rect, Rect), f'{type(rect)=} {rect=}'
if rect.is_empty:
big_enough = False
result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
if verbose:
log(f'update(): not calling self.place() because rect is empty.')
else:
more, filled = self.place(rect)
state.numcalls += 1
big_enough = not more
result = Story.FitResult(
filled=filled,
more=more,
numcalls=state.numcalls,
parameter=parameter,
rect=rect,
big_enough=big_enough,
)
if verbose:
log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.')
if big_enough:
state.pmax = parameter
state.pmax_result = result
else:
state.pmin = parameter
state.pmin_result = result
state.last_p = parameter
return big_enough
def opposite(p, direction):
'''
Returns same sign as `direction`, larger or smaller than `p` if
direction is positive or negative respectively.
'''
if p is None or p==0:
return direction
if direction * p > 0:
return 2 * p
return -p
if state.pmin is None:
# Find an initial finite pmin value.
if verbose: log(f'finding pmin.')
parameter = opposite(state.pmax, -1)
while 1:
if not update(parameter):
break
parameter *= 2
else:
if update(state.pmin):
if verbose: log(f'{state.pmin=} is big enough.')
return ret()
if state.pmax is None:
# Find an initial finite pmax value.
if verbose: log(f'finding pmax.')
parameter = opposite(state.pmin, +1)
while 1:
if update(parameter):
break
parameter *= 2
else:
if not update(state.pmax):
# No solution possible.
state.pmax = None
if verbose: log(f'No solution possible {state.pmax=}.')
return ret()
# Do binary search in pmin..pmax.
if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.')
while 1:
if state.pmax - state.pmin < delta:
return ret()
parameter = (state.pmin + state.pmax) / 2
update(parameter)
def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
'''
Finds smallest value `scale` in range `scale_min..scale_max` where
`scale * rect` is large enough to contain the story `self`.
Returns a `Story.FitResult` instance.
:arg width:
width of rect.
:arg height:
height of rect.
:arg scale_min:
Minimum scale to consider; must be >= 0.
:arg scale_max:
Maximum scale to consider, must be >= scale_min or `None` for
infinite.
:arg delta:
Maximum error in returned scale.
:arg verbose:
If true we output diagnostics.
'''
x0, y0, x1, y1 = rect
width = x1 - x0
height = y1 - y0
def fn(scale):
return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
return self.fit(fn, scale_min, scale_max, delta, verbose)
def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
'''
Finds smallest height in range `height_min..height_max` where a rect
with size `(width, height)` is large enough to contain the story
`self`.
Returns a `Story.FitResult` instance.
:arg width:
width of rect.
:arg height_min:
Minimum height to consider; must be >= 0.
:arg height_max:
Maximum height to consider, must be >= height_min or `None` for
infinite.
:arg origin:
`(x0, y0)` of rect.
:arg delta:
Maximum error in returned height.
:arg verbose:
If true we output diagnostics.
'''
x0, y0 = origin
x1 = x0 + width
def fn(height):
return Rect(x0, y0, x1, y0+height)
return self.fit(fn, height_min, height_max, delta, verbose)
def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False):
'''
Finds smallest width in range `width_min..width_max` where a rect with size
`(width, height)` is large enough to contain the story `self`.
Returns a `Story.FitResult` instance.
Returns a `FitResult` instance.
:arg height:
height of rect.
:arg width_min:
Minimum width to consider; must be >= 0.
:arg width_max:
Maximum width to consider, must be >= width_min or `None` for
infinite.
:arg origin:
`(x0, y0)` of rect.
:arg delta:
Maximum error in returned width.
:arg verbose:
If true we output diagnostics.
'''
x0, y0 = origin
y1 = x0 + height
def fn(width):
return Rect(x0, y0, x0+width, y1)
return self.fit(fn, width_min, width_max, delta, verbose)
class TextPage:
def __init__(self, *args):
if args_match(args, mupdf.FzRect):
mediabox = args[0]
self.this = mupdf.FzStextPage( mediabox)
elif args_match(args, mupdf.FzStextPage):
self.this = args[0]
else:
raise Exception(f'Unrecognised args: {args}')
self.thisown = True
self.parent = None
def _extractText(self, format_):
this_tpage = self.this
res = mupdf.fz_new_buffer(1024)
out = mupdf.FzOutput( res)
# fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
# because there is no .refs member visible and no fz_keep_output() fn,
# although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
# doesn't convert the returnd fz_output* into a mupdf.FzOutput.
#out = mupdf.FzOutput(out)
if format_ == 1:
mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
elif format_ == 3:
mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0)
elif format_ == 4:
mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0)
else:
JM_print_stext_page_as_text(res, this_tpage)
out.fz_close_output()
text = JM_EscapeStrFromBuffer(res)
return text
def _getNewBlockList(self, page_dict, raw):
JM_make_textpage_dict(self.this, page_dict, raw)
def _textpage_dict(self, raw=False):
page_dict = {"width": self.rect.width, "height": self.rect.height}
self._getNewBlockList(page_dict, raw)
return page_dict
def extractBLOCKS(self):
"""Return a list with text block information."""
if g_use_extra:
return extra.extractBLOCKS(self.this)
block_n = -1
this_tpage = self.this
tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
res = mupdf.fz_new_buffer(1024)
lines = []
for block in this_tpage:
block_n += 1
blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
mupdf.fz_clear_buffer(res) # set text buffer to empty
line_n = -1
last_char = 0
for line in block:
line_n += 1
linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
for ch in line:
cbbox = JM_char_bbox(line, ch)
if (not JM_rects_overlap(tp_rect, cbbox)
and not mupdf.fz_is_infinite_rect(tp_rect)
):
continue
JM_append_rune(res, ch.m_internal.c)
last_char = ch.m_internal.c
linerect = mupdf.fz_union_rect(linerect, cbbox)
if last_char != 10 and not mupdf.fz_is_empty_rect(linerect):
mupdf.fz_append_byte(res, 10)
blockrect = mupdf.fz_union_rect(blockrect, linerect)
text = JM_EscapeStrFromBuffer(res)
elif (JM_rects_overlap(tp_rect, block.m_internal.bbox)
or mupdf.fz_is_infinite_rect(tp_rect)
):
img = block.i_image()
cs = img.colorspace()
text = "<image: %s, width: %d, height: %d, bpc: %d>" % (
mupdf.fz_colorspace_name(cs),
img.w(), img.h(), img.bpc()
)
blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox))
if not mupdf.fz_is_empty_rect(blockrect):
litem = (
blockrect.x0,
blockrect.y0,
blockrect.x1,
blockrect.y1,
text,
block_n,
block.m_internal.type,
)
lines.append(litem)
return lines
def extractDICT(self, cb=None, sort=False) -> dict:
"""Return page content as a Python dict of images and text spans."""
val = self._textpage_dict(raw=False)
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
return val
def extractHTML(self) -> str:
"""Return page content as a HTML string."""
return self._extractText(1)
def extractIMGINFO(self, hashes=0):
"""Return a list with image meta information."""
block_n = -1
this_tpage = self.this
rc = []
for block in this_tpage:
block_n += 1
if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
continue
img = block.i_image()
img_size = 0
if mupdf_version_tuple >= (1, 24):
compr_buff = mupdf.fz_compressed_image_buffer(img)
if compr_buff:
img_size = compr_buff.fz_compressed_buffer_size()
compr_buff = None
else:
compr_buff = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
if compr_buff:
img_size = mupdf.ll_fz_compressed_buffer_size(compr_buff)
if hashes:
r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
assert mupdf.fz_is_infinite_irect(r)
m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0)
pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m)
digest = mupdf.fz_md5_pixmap2(pix)
digest = bytes(digest)
if img_size == 0:
img_size = img.w() * img.h() * img.n()
cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace))
block_dict = dict()
block_dict[ dictkey_number] = block_n
block_dict[ dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
block_dict[ dictkey_matrix] = JM_py_from_matrix(block.i_transform())
block_dict[ dictkey_width] = img.w()
block_dict[ dictkey_height] = img.h()
block_dict[ dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
block_dict[ dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
block_dict[ dictkey_xres] = img.xres()
block_dict[ dictkey_yres] = img.yres()
block_dict[ dictkey_bpc] = img.bpc()
block_dict[ dictkey_size] = img_size
if hashes:
block_dict[ "digest"] = digest
rc.append(block_dict)
return rc
def extractJSON(self, cb=None, sort=False) -> str:
"""Return 'extractDICT' converted to JSON format."""
import base64
import json
val = self._textpage_dict(raw=False)
class b64encode(json.JSONEncoder):
def default(self, s):
if type(s) in (bytes, bytearray):
return base64.b64encode(s).decode()
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
return val
def extractRAWDICT(self, cb=None, sort=False) -> dict:
"""Return page content as a Python dict of images and text characters."""
val = self._textpage_dict(raw=True)
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
return val
def extractRAWJSON(self, cb=None, sort=False) -> str:
"""Return 'extractRAWDICT' converted to JSON format."""
import base64
import json
val = self._textpage_dict(raw=True)
class b64encode(json.JSONEncoder):
def default(self,s):
if type(s) in (bytes, bytearray):
return base64.b64encode(s).decode()
if cb is not None:
val["width"] = cb.width
val["height"] = cb.height
if sort is True:
blocks = val["blocks"]
blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
val["blocks"] = blocks
val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
return val
def extractSelection(self, pointa, pointb):
a = JM_point_from_py(pointa)
b = JM_point_from_py(pointb)
found = mupdf.fz_copy_selection(self.this, a, b, 0)
return found
def extractText(self, sort=False) -> str:
"""Return simple, bare text on the page."""
if sort is False:
return self._extractText(0)
blocks = self.extractBLOCKS()[:]
blocks.sort(key=lambda b: (b[3], b[0]))
return "".join([b[4] for b in blocks])
def extractTextbox(self, rect):
this_tpage = self.this
assert isinstance(this_tpage, mupdf.FzStextPage)
area = JM_rect_from_py(rect)
found = JM_copy_rectangle(this_tpage, area)
rc = PyUnicode_DecodeRawUnicodeEscape(found)
return rc
def extractWORDS(self, delimiters=None):
"""Return a list with text word information."""
if g_use_extra:
return extra.extractWORDS(self.this, delimiters)
buflen = 0
block_n = -1
wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word bbox
this_tpage = self.this
tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
lines = None
buff = mupdf.fz_new_buffer(64)
lines = []
for block in this_tpage:
block_n += 1
if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
continue
line_n = -1
for line in block:
line_n += 1
word_n = 0 # word counter per line
mupdf.fz_clear_buffer(buff) # reset word buffer
buflen = 0 # reset char counter
for ch in line:
cbbox = JM_char_bbox(line, ch)
if (not JM_rects_overlap(tp_rect, cbbox)
and not mupdf.fz_is_infinite_rect(tp_rect)
):
continue
word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
if word_delimiter:
if buflen == 0:
continue # skip delimiters at line start
if not mupdf.fz_is_empty_rect(wbbox):
word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
mupdf.fz_clear_buffer(buff)
buflen = 0 # reset char counter
continue
# append one unicode character to the word
JM_append_rune(buff, ch.m_internal.c)
buflen += 1
# enlarge word bbox
wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch))
if buflen and not mupdf.fz_is_empty_rect(wbbox):
word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
buflen = 0
return lines
def extractXHTML(self) -> str:
"""Return page content as a XHTML string."""
return self._extractText(4)
def extractXML(self) -> str:
"""Return page content as a XML string."""
return self._extractText(3)
def poolsize(self):
"""TextPage current poolsize."""
tpage = self.this
pool = mupdf.Pool(tpage.m_internal.pool)
size = mupdf.fz_pool_size( pool)
pool.m_internal = None # Ensure that pool's destructor does not free the pool.
return size
@property
def rect(self):
"""Page rectangle."""
this_tpage = self.this
mediabox = this_tpage.m_internal.mediabox
val = JM_py_from_rect(mediabox)
val = Rect(val)
return val
def search(self, needle, hit_max=0, quads=1):
"""Locate 'needle' returning rects or quads."""
val = JM_search_stext_page(self.this, needle)
if not val:
return val
items = len(val)
for i in range(items): # change entries to quads or rects
q = Quad(val[i])
if quads:
val[i] = q
else:
val[i] = q.rect
if quads:
return val
i = 0 # join overlapping rects on the same line
while i < items - 1:
v1 = val[i]
v2 = val[i + 1]
if v1.y1 != v2.y1 or (v1 & v2).is_empty:
i += 1
continue # no overlap on same line
val[i] = v1 | v2 # join rectangles
del val[i + 1] # remove v2
items -= 1 # reduce item count
return val
extractTEXT = extractText
class TextWriter:
def __init__(self, page_rect, opacity=1, color=None):
"""Stores text spans for later output on compatible PDF pages."""
self.this = mupdf.fz_new_text()
self.opacity = opacity
self.color = color
self.rect = Rect(page_rect)
self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height)
self.ictm = ~self.ctm
self.last_point = Point()
self.last_point.__doc__ = "Position following last text insertion."
self.text_rect = Rect()
self.text_rect.__doc__ = "Accumulated area of text spans."
self.used_fonts = set()
self.thisown = True
@property
def _bbox(self):
val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix()))
val = Rect(val)
return val
def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0):
"""Store 'text' at point 'pos' using 'font' and 'fontsize'."""
pos = Point(pos) * self.ictm
#log( '{font=}')
if font is None:
font = Font("helv")
if not font.is_writable:
if 0:
log( '{font.this.m_internal.name=}')
log( '{font.this.m_internal.t3matrix=}')
log( '{font.this.m_internal.bbox=}')
log( '{font.this.m_internal.glyph_count=}')
log( '{font.this.m_internal.use_glyph_bbox=}')
log( '{font.this.m_internal.width_count=}')
log( '{font.this.m_internal.width_default=}')
log( '{font.this.m_internal.has_digest=}')
log( 'Unsupported font {font.name=}')
if mupdf_cppyy:
import cppyy
log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}')
raise ValueError("Unsupported font '%s'." % font.name)
if right_to_left:
text = self.clean_rtl(text)
text = "".join(reversed(text))
right_to_left = 0
lang = mupdf.fz_text_language_from_string(language)
p = JM_point_from_py(pos)
trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y)
markup_dir = 0
wmode = 0
if small_caps == 0:
trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
else:
trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
val = JM_py_from_matrix(trm)
self.last_point = Point(val[-2:]) * self.ctm
self.text_rect = self._bbox * self.ctm
val = self.text_rect, self.last_point
if font.flags["mono"] == 1:
self.used_fonts.add(font)
return val
def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False):
lheight = fontsize * 1.2
for c in text:
self.append(pos, c, font=font, fontsize=fontsize,
language=language, small_caps=small_caps)
pos.y += lheight
return self.text_rect, self.last_point
def clean_rtl(self, text):
"""Revert the sequence of Latin text parts.
Text with right-to-left writing direction (Arabic, Hebrew) often
contains Latin parts, which are written in left-to-right: numbers, names,
etc. For output as PDF text we need *everything* in right-to-left.
E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be
converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic
parts remain untouched.
Args:
text: str
Returns:
Massaged string.
"""
if not text:
return text
# split into words at space boundaries
words = text.split(" ")
idx = []
for i in range(len(words)):
w = words[i]
# revert character sequence for Latin only words
if not (len(w) < 2 or max([ord(c) for c in w]) > 255):
words[i] = "".join(reversed(w))
idx.append(i) # stored index of Latin word
# adjacent Latin words must revert their sequence, too
idx2 = [] # store indices of adjacent Latin words
for i in range(len(idx)):
if idx2 == []: # empty yet?
idx2.append(idx[i]) # store Latin word number
elif idx[i] > idx2[-1] + 1: # large gap to last?
if len(idx2) > 1: # at least two consecutives?
words[idx2[0] : idx2[-1] + 1] = reversed(
words[idx2[0] : idx2[-1] + 1]
) # revert their sequence
idx2 = [idx[i]] # re-initialize
elif idx[i] == idx2[-1] + 1: # new adjacent Latin word
idx2.append(idx[i])
text = " ".join(words)
return text
def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
"""Write the text to a PDF page having the TextWriter's page size.
Args:
page: a PDF page having same size.
color: override text color.
opacity: override transparency.
overlay: put in foreground or background.
morph: tuple(Point, Matrix), apply a matrix with a fixpoint.
matrix: Matrix to be used instead of 'morph' argument.
render_mode: (int) PDF render mode operator 'Tr'.
"""
CheckParent(page)
if abs(self.rect - page.rect) > 1e-3:
raise ValueError("incompatible page rect")
if morph is not None:
if (type(morph) not in (tuple, list)
or type(morph[0]) is not Point
or type(morph[1]) is not Matrix
):
raise ValueError("morph must be (Point, Matrix) or None")
if matrix is not None and morph is not None:
raise ValueError("only one of matrix, morph is allowed")
if getattr(opacity, "__float__", None) is None or opacity == -1:
opacity = self.opacity
if color is None:
color = self.color
if 1:
pdfpage = page._pdf_page()
alpha = 1
if opacity >= 0 and opacity < 1:
alpha = opacity
ncol = 1
dev_color = [0, 0, 0, 0]
if color:
ncol, dev_color = JM_color_FromSequence(color)
if ncol == 3:
colorspace = mupdf.fz_device_rgb()
elif ncol == 4:
colorspace = mupdf.fz_device_cmyk()
else:
colorspace = mupdf.fz_device_gray()
ASSERT_PDF(pdfpage)
resources = mupdf.pdf_new_dict(pdfpage.doc(), 5)
contents = mupdf.fz_new_buffer(1024)
dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents)
#log( '=== {dev_color!r=}')
mupdf.fz_fill_text(
dev,
self.this,
mupdf.FzMatrix(),
colorspace,
dev_color,
alpha,
mupdf.FzColorParams(mupdf.fz_default_color_params),
)
mupdf.fz_close_device( dev)
# copy generated resources into the one of the page
max_nums = JM_merge_resources( pdfpage, resources)
cont_string = JM_EscapeStrFromBuffer( contents)
result = (max_nums, cont_string)
val = result
max_nums = val[0]
content = val[1]
max_alp, max_font = max_nums
old_cont_lines = content.splitlines()
optcont = page._get_optional_content(oc)
if optcont is not None:
bdc = "/OC /%s BDC" % optcont
emc = "EMC"
else:
bdc = emc = ""
new_cont_lines = ["q"]
if bdc:
new_cont_lines.append(bdc)
cb = page.cropbox_position
if page.rotation in (90, 270):
delta = page.rect.height - page.rect.width
else:
delta = 0
mb = page.mediabox
if bool(cb) or mb.y0 != 0 or delta != 0:
new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm")
if morph:
p = morph[0] * self.ictm
delta = Matrix(1, 1).pretranslate(p.x, p.y)
matrix = ~delta * morph[1] * delta
if morph or matrix:
new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm")
for line in old_cont_lines:
if line.endswith(" cm"):
continue
if line == "BT":
new_cont_lines.append(line)
new_cont_lines.append("%i Tr" % render_mode)
continue
if line.endswith(" gs"):
alp = int(line.split()[0][4:]) + max_alp
line = "/Alp%i gs" % alp
elif line.endswith(" Tf"):
temp = line.split()
fsize = float(temp[1])
if render_mode != 0:
w = fsize * 0.05
else:
w = 1
new_cont_lines.append(_format_g(w) + " w")
font = int(temp[0][2:]) + max_font
line = " ".join(["/F%i" % font] + temp[1:])
elif line.endswith(" rg"):
new_cont_lines.append(line.replace("rg", "RG"))
elif line.endswith(" g"):
new_cont_lines.append(line.replace(" g", " G"))
elif line.endswith(" k"):
new_cont_lines.append(line.replace(" k", " K"))
new_cont_lines.append(line)
if emc:
new_cont_lines.append(emc)
new_cont_lines.append("Q\n")
content = "\n".join(new_cont_lines).encode("utf-8")
TOOLS._insert_contents(page, content, overlay=overlay)
val = None
for font in self.used_fonts:
repair_mono_font(page, font)
return val
class IRect:
"""
IRect() - all zeros
IRect(x0, y0, x1, y1) - 4 coordinates
IRect(top-left, x1, y1) - point and 2 coordinates
IRect(x0, y0, bottom-right) - 2 coordinates and point
IRect(top-left, bottom-right) - 2 points
IRect(sequ) - new from sequence or rect-like
"""
def __add__(self, p):
return Rect.__add__(self, p).round()
def __and__(self, x):
return Rect.__and__(self, x).round()
def __contains__(self, x):
return Rect.__contains__(self, x)
def __eq__(self, r):
if not hasattr(r, "__len__"):
return False
return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3]
def __getitem__(self, i):
return (self.x0, self.y0, self.x1, self.y1)[i]
def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
def __len__(self):
return 4
def __mul__(self, m):
return Rect.__mul__(self, m).round()
def __neg__(self):
return IRect(-self.x0, -self.y0, -self.x1, -self.y1)
def __or__(self, x):
return Rect.__or__(self, x).round()
def __pos__(self):
return IRect(self)
def __repr__(self):
return "IRect" + str(tuple(self))
def __setitem__(self, i, v):
v = int(v)
if i == 0: self.x0 = v
elif i == 1: self.y0 = v
elif i == 2: self.x1 = v
elif i == 3: self.y1 = v
else:
raise IndexError("index out of range")
return None
def __sub__(self, p):
return Rect.__sub__(self, p).round()
def __truediv__(self, m):
return Rect.__truediv__(self, m).round()
@property
def bottom_left(self):
"""Bottom-left corner."""
return Point(self.x0, self.y1)
@property
def bottom_right(self):
"""Bottom-right corner."""
return Point(self.x1, self.y1)
@property
def height(self):
return max(0, self.y1 - self.y0)
def include_point(self, p):
"""Extend rectangle to include point p."""
rect = self.rect.include_point(p)
return rect.irect
def include_rect(self, r):
"""Extend rectangle to include rectangle r."""
rect = self.rect.include_rect(r)
return rect.irect
def intersect(self, r):
"""Restrict rectangle to intersection with rectangle r."""
return Rect.intersect(self, r).round()
def intersects(self, x):
return Rect.intersects(self, x)
@property
def is_empty(self):
"""True if rectangle area is empty."""
return self.x0 >= self.x1 or self.y0 >= self.y1
@property
def is_infinite(self):
"""True if rectangle is infinite."""
return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
@property
def is_valid(self):
"""True if rectangle is valid."""
return self.x0 <= self.x1 and self.y0 <= self.y1
def morph(self, p, m):
"""Morph with matrix-like m and point-like p.
Returns a new quad."""
if self.is_infinite:
return INFINITE_QUAD()
return self.quad.morph(p, m)
def norm(self):
return math.sqrt(sum([c*c for c in self]))
def normalize(self):
"""Replace rectangle with its valid version."""
if self.x1 < self.x0:
self.x0, self.x1 = self.x1, self.x0
if self.y1 < self.y0:
self.y0, self.y1 = self.y1, self.y0
return self
@property
def quad(self):
"""Return Quad version of rectangle."""
return Quad(self.tl, self.tr, self.bl, self.br)
@property
def rect(self):
return Rect(self)
@property
def top_left(self):
"""Top-left corner."""
return Point(self.x0, self.y0)
@property
def top_right(self):
"""Top-right corner."""
return Point(self.x1, self.y0)
def torect(self, r):
"""Return matrix that converts to target rect."""
r = Rect(r)
if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
raise ValueError("rectangles must be finite and not empty")
return (
Matrix(1, 0, 0, 1, -self.x0, -self.y0)
* Matrix(r.width / self.width, r.height / self.height)
* Matrix(1, 0, 0, 1, r.x0, r.y0)
)
def transform(self, m):
return Rect.transform(self, m).round()
@property
def width(self):
return max(0, self.x1 - self.x0)
br = bottom_right
bl = bottom_left
tl = top_left
tr = top_right
# Data
#
if 1:
_self = sys.modules[__name__]
if 1:
for _name, _value in mupdf.__dict__.items():
if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
if _name.startswith('PDF_ENUM_NAME_'):
# Not a simple enum.
pass
else:
#assert not inspect.isroutine(value)
#log(f'importing {name}')
setattr(_self, _name, _value)
#log(f'{getattr( self, name, None)=}')
else:
# This is slow due to importing inspect, e.g. 0.019 instead of 0.004.
for _name, _value in inspect.getmembers(mupdf):
if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
if _name.startswith('PDF_ENUM_NAME_'):
# Not a simple enum.
pass
else:
#assert not inspect.isroutine(value)
#log(f'importing {name}')
setattr(_self, _name, _value)
#log(f'{getattr( self, name, None)=}')
# This is a macro so not preserved in mupdf C++/Python bindings.
#
PDF_SIGNATURE_DEFAULT_APPEARANCE = (0
| mupdf.PDF_SIGNATURE_SHOW_LABELS
| mupdf.PDF_SIGNATURE_SHOW_DN
| mupdf.PDF_SIGNATURE_SHOW_DATE
| mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME
| mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME
| mupdf.PDF_SIGNATURE_SHOW_LOGO
)
#UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM
#setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM)
assert mupdf.UCDN_EAST_ASIAN_H == 1
# Flake8 incorrectly fails next two lines because we've dynamically added
# items to self.
assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821
assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821
del _self, _name, _value
_adobe_glyphs = {}
_adobe_unicodes = {}
AnyType = typing.Any
Base14_fontnames = (
"Courier",
"Courier-Oblique",
"Courier-Bold",
"Courier-BoldOblique",
"Helvetica",
"Helvetica-Oblique",
"Helvetica-Bold",
"Helvetica-BoldOblique",
"Times-Roman",
"Times-Italic",
"Times-Bold",
"Times-BoldItalic",
"Symbol",
"ZapfDingbats",
)
Base14_fontdict = {}
for f in Base14_fontnames:
Base14_fontdict[f.lower()] = f
Base14_fontdict["helv"] = "Helvetica"
Base14_fontdict["heit"] = "Helvetica-Oblique"
Base14_fontdict["hebo"] = "Helvetica-Bold"
Base14_fontdict["hebi"] = "Helvetica-BoldOblique"
Base14_fontdict["cour"] = "Courier"
Base14_fontdict["coit"] = "Courier-Oblique"
Base14_fontdict["cobo"] = "Courier-Bold"
Base14_fontdict["cobi"] = "Courier-BoldOblique"
Base14_fontdict["tiro"] = "Times-Roman"
Base14_fontdict["tibo"] = "Times-Bold"
Base14_fontdict["tiit"] = "Times-Italic"
Base14_fontdict["tibi"] = "Times-BoldItalic"
Base14_fontdict["symb"] = "Symbol"
Base14_fontdict["zadb"] = "ZapfDingbats"
EPSILON = 1e-5
FLT_EPSILON = 1e-5
# largest 32bit integers surviving C float conversion roundtrips
# used by MuPDF to define infinite rectangles
FZ_MIN_INF_RECT = -0x80000000
FZ_MAX_INF_RECT = 0x7fffff80
JM_annot_id_stem = "fitz"
JM_mupdf_warnings_store = []
JM_mupdf_show_errors = 1
JM_mupdf_show_warnings = 0
# ------------------------------------------------------------------------------
# Various PDF Optional Content Flags
# ------------------------------------------------------------------------------
PDF_OC_ON = 0
PDF_OC_TOGGLE = 1
PDF_OC_OFF = 2
# ------------------------------------------------------------------------------
# link kinds and link flags
# ------------------------------------------------------------------------------
LINK_NONE = 0
LINK_GOTO = 1
LINK_URI = 2
LINK_LAUNCH = 3
LINK_NAMED = 4
LINK_GOTOR = 5
LINK_FLAG_L_VALID = 1
LINK_FLAG_T_VALID = 2
LINK_FLAG_R_VALID = 4
LINK_FLAG_B_VALID = 8
LINK_FLAG_FIT_H = 16
LINK_FLAG_FIT_V = 32
LINK_FLAG_R_IS_ZOOM = 64
SigFlag_SignaturesExist = 1
SigFlag_AppendOnly = 2
STAMP_Approved = 0
STAMP_AsIs = 1
STAMP_Confidential = 2
STAMP_Departmental = 3
STAMP_Experimental = 4
STAMP_Expired = 5
STAMP_Final = 6
STAMP_ForComment = 7
STAMP_ForPublicRelease = 8
STAMP_NotApproved = 9
STAMP_NotForPublicRelease = 10
STAMP_Sold = 11
STAMP_TopSecret = 12
STAMP_Draft = 13
TEXT_ALIGN_LEFT = 0
TEXT_ALIGN_CENTER = 1
TEXT_ALIGN_RIGHT = 2
TEXT_ALIGN_JUSTIFY = 3
TEXT_FONT_SUPERSCRIPT = 1
TEXT_FONT_ITALIC = 2
TEXT_FONT_SERIFED = 4
TEXT_FONT_MONOSPACED = 8
TEXT_FONT_BOLD = 16
TEXT_OUTPUT_TEXT = 0
TEXT_OUTPUT_HTML = 1
TEXT_OUTPUT_JSON = 2
TEXT_OUTPUT_XML = 3
TEXT_OUTPUT_XHTML = 4
TEXT_PRESERVE_LIGATURES = 1
TEXT_PRESERVE_WHITESPACE = 2
TEXT_PRESERVE_IMAGES = 4
TEXT_INHIBIT_SPACES = 8
TEXT_DEHYPHENATE = 16
TEXT_PRESERVE_SPANS = 32
TEXT_MEDIABOX_CLIP = 64
TEXT_CID_FOR_UNKNOWN_UNICODE = 128
TEXTFLAGS_WORDS = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
TEXTFLAGS_BLOCKS = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
TEXTFLAGS_DICT = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_PRESERVE_IMAGES
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT
TEXTFLAGS_SEARCH = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_DEHYPHENATE
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
TEXTFLAGS_HTML = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_PRESERVE_IMAGES
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
TEXTFLAGS_XHTML = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_PRESERVE_IMAGES
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
TEXTFLAGS_XML = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
TEXTFLAGS_TEXT = (0
| TEXT_PRESERVE_LIGATURES
| TEXT_PRESERVE_WHITESPACE
| TEXT_MEDIABOX_CLIP
| TEXT_CID_FOR_UNKNOWN_UNICODE
)
# Simple text encoding options
TEXT_ENCODING_LATIN = 0
TEXT_ENCODING_GREEK = 1
TEXT_ENCODING_CYRILLIC = 2
TOOLS_JM_UNIQUE_ID = 0
# colorspace identifiers
CS_RGB = 1
CS_GRAY = 2
CS_CMYK = 3
# PDF Blend Modes
PDF_BM_Color = "Color"
PDF_BM_ColorBurn = "ColorBurn"
PDF_BM_ColorDodge = "ColorDodge"
PDF_BM_Darken = "Darken"
PDF_BM_Difference = "Difference"
PDF_BM_Exclusion = "Exclusion"
PDF_BM_HardLight = "HardLight"
PDF_BM_Hue = "Hue"
PDF_BM_Lighten = "Lighten"
PDF_BM_Luminosity = "Luminosity"
PDF_BM_Multiply = "Multiply"
PDF_BM_Normal = "Normal"
PDF_BM_Overlay = "Overlay"
PDF_BM_Saturation = "Saturation"
PDF_BM_Screen = "Screen"
PDF_BM_SoftLight = "Softlight"
# General text flags
TEXT_FONT_SUPERSCRIPT = 1
TEXT_FONT_ITALIC = 2
TEXT_FONT_SERIFED = 4
TEXT_FONT_MONOSPACED = 8
TEXT_FONT_BOLD = 16
annot_skel = {
"goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>",
"goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
"gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>",
"gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
"launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
"uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
"named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
}
class FileDataError(RuntimeError):
"""Raised for documents with file structure issues."""
pass
class FileNotFoundError(RuntimeError):
"""Raised if file does not exist."""
pass
class EmptyFileError(FileDataError):
"""Raised when creating documents from zero-length data."""
pass
# propagate exception class to C-level code
#_set_FileDataError(FileDataError)
csRGB = Colorspace(CS_RGB)
csGRAY = Colorspace(CS_GRAY)
csCMYK = Colorspace(CS_CMYK)
# These don't appear to be visible in classic, but are used
# internally.
#
dictkey_align = "align"
dictkey_asc = "ascender"
dictkey_bbox = "bbox"
dictkey_blocks = "blocks"
dictkey_bpc = "bpc"
dictkey_c = "c"
dictkey_chars = "chars"
dictkey_color = "color"
dictkey_colorspace = "colorspace"
dictkey_content = "content"
dictkey_creationDate = "creationDate"
dictkey_cs_name = "cs-name"
dictkey_da = "da"
dictkey_dashes = "dashes"
dictkey_desc = "desc"
dictkey_desc = "descender"
dictkey_dir = "dir"
dictkey_effect = "effect"
dictkey_ext = "ext"
dictkey_filename = "filename"
dictkey_fill = "fill"
dictkey_flags = "flags"
dictkey_font = "font"
dictkey_glyph = "glyph"
dictkey_height = "height"
dictkey_id = "id"
dictkey_image = "image"
dictkey_items = "items"
dictkey_length = "length"
dictkey_lines = "lines"
dictkey_matrix = "transform"
dictkey_modDate = "modDate"
dictkey_name = "name"
dictkey_number = "number"
dictkey_origin = "origin"
dictkey_rect = "rect"
dictkey_size = "size"
dictkey_smask = "smask"
dictkey_spans = "spans"
dictkey_stroke = "stroke"
dictkey_style = "style"
dictkey_subject = "subject"
dictkey_text = "text"
dictkey_title = "title"
dictkey_type = "type"
dictkey_ufilename = "ufilename"
dictkey_width = "width"
dictkey_wmode = "wmode"
dictkey_xref = "xref"
dictkey_xres = "xres"
dictkey_yres = "yres"
try:
from pymupdf_fonts import fontdescriptors, fontbuffers
fitz_fontdescriptors = fontdescriptors.copy()
for k in fitz_fontdescriptors.keys():
fitz_fontdescriptors[k]["loader"] = fontbuffers[k]
del fontdescriptors, fontbuffers
except ImportError:
fitz_fontdescriptors = {}
symbol_glyphs = ( # Glyph list for the built-in font 'Symbol'
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(32, 0.25),
(33, 0.333),
(34, 0.713),
(35, 0.5),
(36, 0.549),
(37, 0.833),
(38, 0.778),
(39, 0.439),
(40, 0.333),
(41, 0.333),
(42, 0.5),
(43, 0.549),
(44, 0.25),
(45, 0.549),
(46, 0.25),
(47, 0.278),
(48, 0.5),
(49, 0.5),
(50, 0.5),
(51, 0.5),
(52, 0.5),
(53, 0.5),
(54, 0.5),
(55, 0.5),
(56, 0.5),
(57, 0.5),
(58, 0.278),
(59, 0.278),
(60, 0.549),
(61, 0.549),
(62, 0.549),
(63, 0.444),
(64, 0.549),
(65, 0.722),
(66, 0.667),
(67, 0.722),
(68, 0.612),
(69, 0.611),
(70, 0.763),
(71, 0.603),
(72, 0.722),
(73, 0.333),
(74, 0.631),
(75, 0.722),
(76, 0.686),
(77, 0.889),
(78, 0.722),
(79, 0.722),
(80, 0.768),
(81, 0.741),
(82, 0.556),
(83, 0.592),
(84, 0.611),
(85, 0.69),
(86, 0.439),
(87, 0.768),
(88, 0.645),
(89, 0.795),
(90, 0.611),
(91, 0.333),
(92, 0.863),
(93, 0.333),
(94, 0.658),
(95, 0.5),
(96, 0.5),
(97, 0.631),
(98, 0.549),
(99, 0.549),
(100, 0.494),
(101, 0.439),
(102, 0.521),
(103, 0.411),
(104, 0.603),
(105, 0.329),
(106, 0.603),
(107, 0.549),
(108, 0.549),
(109, 0.576),
(110, 0.521),
(111, 0.549),
(112, 0.549),
(113, 0.521),
(114, 0.549),
(115, 0.603),
(116, 0.439),
(117, 0.576),
(118, 0.713),
(119, 0.686),
(120, 0.493),
(121, 0.686),
(122, 0.494),
(123, 0.48),
(124, 0.2),
(125, 0.48),
(126, 0.549),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(183, 0.46),
(160, 0.25),
(161, 0.62),
(162, 0.247),
(163, 0.549),
(164, 0.167),
(165, 0.713),
(166, 0.5),
(167, 0.753),
(168, 0.753),
(169, 0.753),
(170, 0.753),
(171, 1.042),
(172, 0.713),
(173, 0.603),
(174, 0.987),
(175, 0.603),
(176, 0.4),
(177, 0.549),
(178, 0.411),
(179, 0.549),
(180, 0.549),
(181, 0.576),
(182, 0.494),
(183, 0.46),
(184, 0.549),
(185, 0.549),
(186, 0.549),
(187, 0.549),
(188, 1),
(189, 0.603),
(190, 1),
(191, 0.658),
(192, 0.823),
(193, 0.686),
(194, 0.795),
(195, 0.987),
(196, 0.768),
(197, 0.768),
(198, 0.823),
(199, 0.768),
(200, 0.768),
(201, 0.713),
(202, 0.713),
(203, 0.713),
(204, 0.713),
(205, 0.713),
(206, 0.713),
(207, 0.713),
(208, 0.768),
(209, 0.713),
(210, 0.79),
(211, 0.79),
(212, 0.89),
(213, 0.823),
(214, 0.549),
(215, 0.549),
(216, 0.713),
(217, 0.603),
(218, 0.603),
(219, 1.042),
(220, 0.987),
(221, 0.603),
(222, 0.987),
(223, 0.603),
(224, 0.494),
(225, 0.329),
(226, 0.79),
(227, 0.79),
(228, 0.786),
(229, 0.713),
(230, 0.384),
(231, 0.384),
(232, 0.384),
(233, 0.384),
(234, 0.384),
(235, 0.384),
(236, 0.494),
(237, 0.494),
(238, 0.494),
(239, 0.494),
(183, 0.46),
(241, 0.329),
(242, 0.274),
(243, 0.686),
(244, 0.686),
(245, 0.686),
(246, 0.384),
(247, 0.549),
(248, 0.384),
(249, 0.384),
(250, 0.384),
(251, 0.384),
(252, 0.494),
(253, 0.494),
(254, 0.494),
(183, 0.46),
)
zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats'
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(32, 0.278),
(33, 0.974),
(34, 0.961),
(35, 0.974),
(36, 0.98),
(37, 0.719),
(38, 0.789),
(39, 0.79),
(40, 0.791),
(41, 0.69),
(42, 0.96),
(43, 0.939),
(44, 0.549),
(45, 0.855),
(46, 0.911),
(47, 0.933),
(48, 0.911),
(49, 0.945),
(50, 0.974),
(51, 0.755),
(52, 0.846),
(53, 0.762),
(54, 0.761),
(55, 0.571),
(56, 0.677),
(57, 0.763),
(58, 0.76),
(59, 0.759),
(60, 0.754),
(61, 0.494),
(62, 0.552),
(63, 0.537),
(64, 0.577),
(65, 0.692),
(66, 0.786),
(67, 0.788),
(68, 0.788),
(69, 0.79),
(70, 0.793),
(71, 0.794),
(72, 0.816),
(73, 0.823),
(74, 0.789),
(75, 0.841),
(76, 0.823),
(77, 0.833),
(78, 0.816),
(79, 0.831),
(80, 0.923),
(81, 0.744),
(82, 0.723),
(83, 0.749),
(84, 0.79),
(85, 0.792),
(86, 0.695),
(87, 0.776),
(88, 0.768),
(89, 0.792),
(90, 0.759),
(91, 0.707),
(92, 0.708),
(93, 0.682),
(94, 0.701),
(95, 0.826),
(96, 0.815),
(97, 0.789),
(98, 0.789),
(99, 0.707),
(100, 0.687),
(101, 0.696),
(102, 0.689),
(103, 0.786),
(104, 0.787),
(105, 0.713),
(106, 0.791),
(107, 0.785),
(108, 0.791),
(109, 0.873),
(110, 0.761),
(111, 0.762),
(112, 0.762),
(113, 0.759),
(114, 0.759),
(115, 0.892),
(116, 0.892),
(117, 0.788),
(118, 0.784),
(119, 0.438),
(120, 0.138),
(121, 0.277),
(122, 0.415),
(123, 0.392),
(124, 0.392),
(125, 0.668),
(126, 0.668),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(183, 0.788),
(161, 0.732),
(162, 0.544),
(163, 0.544),
(164, 0.91),
(165, 0.667),
(166, 0.76),
(167, 0.76),
(168, 0.776),
(169, 0.595),
(170, 0.694),
(171, 0.626),
(172, 0.788),
(173, 0.788),
(174, 0.788),
(175, 0.788),
(176, 0.788),
(177, 0.788),
(178, 0.788),
(179, 0.788),
(180, 0.788),
(181, 0.788),
(182, 0.788),
(183, 0.788),
(184, 0.788),
(185, 0.788),
(186, 0.788),
(187, 0.788),
(188, 0.788),
(189, 0.788),
(190, 0.788),
(191, 0.788),
(192, 0.788),
(193, 0.788),
(194, 0.788),
(195, 0.788),
(196, 0.788),
(197, 0.788),
(198, 0.788),
(199, 0.788),
(200, 0.788),
(201, 0.788),
(202, 0.788),
(203, 0.788),
(204, 0.788),
(205, 0.788),
(206, 0.788),
(207, 0.788),
(208, 0.788),
(209, 0.788),
(210, 0.788),
(211, 0.788),
(212, 0.894),
(213, 0.838),
(214, 1.016),
(215, 0.458),
(216, 0.748),
(217, 0.924),
(218, 0.748),
(219, 0.918),
(220, 0.927),
(221, 0.928),
(222, 0.928),
(223, 0.834),
(224, 0.873),
(225, 0.828),
(226, 0.924),
(227, 0.924),
(228, 0.917),
(229, 0.93),
(230, 0.931),
(231, 0.463),
(232, 0.883),
(233, 0.836),
(234, 0.836),
(235, 0.867),
(236, 0.867),
(237, 0.696),
(238, 0.696),
(239, 0.874),
(183, 0.788),
(241, 0.874),
(242, 0.76),
(243, 0.946),
(244, 0.771),
(245, 0.865),
(246, 0.771),
(247, 0.888),
(248, 0.967),
(249, 0.888),
(250, 0.831),
(251, 0.873),
(252, 0.927),
(253, 0.97),
(183, 0.788),
(183, 0.788),
)
# Functions
#
def _read_samples( pixmap, offset, n):
# fixme: need to be able to get a sample in one call, as a Python
# bytes or similar.
ret = []
for i in range( n):
ret.append( mupdf.fz_samples_get( pixmap, offset + i))
return bytes( ret)
def _INRANGE(v, low, high):
return low <= v and v <= high
def _remove_dest_range(pdf, numbers):
pagecount = mupdf.pdf_count_pages(pdf)
for i in range(pagecount):
n1 = i
if n1 in numbers:
continue
pageref = mupdf.pdf_lookup_page_obj( pdf, i)
annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots'))
if not annots.m_internal:
continue
len_ = mupdf.pdf_array_len(annots)
for j in range(len_ - 1, -1, -1):
o = mupdf.pdf_array_get( annots, j)
if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')):
continue
action = mupdf.pdf_dict_get( o, PDF_NAME('A'))
dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
if action.m_internal:
if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')):
continue
dest = mupdf.pdf_dict_get( action, PDF_NAME('D'))
pno = -1
if mupdf.pdf_is_array( dest):
target = mupdf.pdf_array_get( dest, 0)
pno = mupdf.pdf_lookup_page_number( pdf, target)
elif mupdf.pdf_is_string( dest):
location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
pno = location.page
if pno < 0: # page number lookup did not work
continue
n1 = pno
if n1 in numbers:
mupdf.pdf_array_delete( annots, j)
def ASSERT_PDF(cond):
assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}'
if not cond.m_internal:
raise Exception(MSG_IS_NO_PDF)
def EMPTY_IRECT():
return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
def EMPTY_QUAD():
return EMPTY_RECT().quad
def EMPTY_RECT():
return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
def ENSURE_OPERATION(pdf):
if not JM_have_operation(pdf):
raise Exception("No journalling operation started")
def INFINITE_IRECT():
return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
def INFINITE_QUAD():
return INFINITE_RECT().quad
def INFINITE_RECT():
return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
def JM_BinFromBuffer(buffer_):
'''
Turn fz_buffer into a Python bytes object
'''
assert isinstance(buffer_, mupdf.FzBuffer)
ret = mupdf.fz_buffer_extract_copy(buffer_)
return ret
def JM_EscapeStrFromStr(c):
# `c` is typically from SWIG which will have converted a `const char*` from
# C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast<
# Py_ssize_t >(size), "surrogateescape")`. This gives us a Python `str`
# with some characters encoded as a \0xdcXY sequence, where `XY` are hex
# digits for an invalid byte in the original `const char*`.
#
# This is actually a reasonable way of representing arbitrary
# strings from C, but we want to mimic what PyMuPDF does. It uses
# `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")`
# which gives a string containing actual unicode characters for any invalid
# bytes.
#
# We mimic this by converting the `str` to a `bytes` with 'surrogateescape'
# to recognise \0xdcXY sequences, then convert the individual bytes into a
# `str` using `chr()`.
#
# Would be good to have a more efficient way to do this.
#
if c is None:
return ''
assert isinstance(c, str), f'{type(c)=}'
b = c.encode('utf8', 'surrogateescape')
ret = ''
for bb in b:
ret += chr(bb)
return ret
def JM_BufferFromBytes(stream):
'''
Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text
io.BytesIO, we convert to binary by encoding as utf8.
'''
if isinstance(stream, (bytes, bytearray)):
data = stream
elif hasattr(stream, 'getvalue'):
data = stream.getvalue()
if isinstance(data, str):
data = data.encode('utf-8')
if not isinstance(data, (bytes, bytearray)):
raise Exception(f'.getvalue() returned unexpected type: {type(data)}')
else:
return mupdf.FzBuffer()
return mupdf.fz_new_buffer_from_copied_data(data)
def JM_FLOAT_ITEM(obj, idx):
if not PySequence_Check(obj):
return None
return float(obj[idx])
def JM_INT_ITEM(obj, idx):
if idx < len(obj):
temp = obj[idx]
if isinstance(temp, (int, float)):
return 0, temp
return 1, None
def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip):
'''
Pixmap creation directly using a short-lived displaylist, so we can support
separations.
'''
SPOTS_NONE = 0
SPOTS_OVERPRINT_SIM = 1
SPOTS_FULL = 2
FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h.
if FZ_ENABLE_SPOT_RENDERING:
spots = SPOTS_OVERPRINT_SIM
else:
spots = SPOTS_NONE
seps = None
colorspace = cs
matrix = JM_matrix_from_py(ctm)
rect = mupdf.fz_bound_page(page)
rclip = JM_rect_from_py(clip)
rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
rect = mupdf.fz_transform_rect(rect, matrix)
bbox = mupdf.fz_round_rect(rect)
# Pixmap of the document's /OutputIntents ("output intents")
oi = mupdf.fz_document_output_intent(doc)
# if present and compatible, use it instead of the parameter
if oi.m_internal:
if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs):
colorspace = mupdf.fz_keep_colorspace(oi)
# check if spots rendering is available and if so use separations
if spots != SPOTS_NONE:
seps = mupdf.fz_page_separations(page)
if seps.m_internal:
n = mupdf.fz_count_separations(seps)
if spots == SPOTS_FULL:
for i in range(n):
mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT)
else:
for i in range(n):
mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE)
elif mupdf.fz_page_uses_overprint(page):
# This page uses overprint, so we need an empty
# sep object to force the overprint simulation on.
seps = mupdf.fz_new_separations(0)
elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace):
# We have an output intent, and it's incompatible
# with the colorspace our device needs. Force the
# overprint simulation on, because this ensures that
# we 'simulate' the output intent too.
seps = mupdf.fz_new_separations(0)
pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha)
if alpha:
mupdf.fz_clear_pixmap(pix)
else:
mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
dev = mupdf.fz_new_draw_device(matrix, pix)
if annots:
mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
else:
mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
mupdf.fz_close_device(dev)
return pix
def JM_StrAsChar(x):
# fixme: should encode, but swig doesn't pass bytes to C as const char*.
return x
#return x.encode('utf8')
def JM_TUPLE(o: typing.Sequence) -> tuple:
return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o))
def JM_TUPLE3(o: typing.Sequence) -> tuple:
return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o))
def JM_UnicodeFromStr(s):
if s is None:
return ''
if isinstance(s, bytes):
s = s.decode('utf8')
assert isinstance(s, str), f'{type(s)=} {s=}'
return s
def JM_add_annot_id(annot, stem):
'''
Add a unique /NM key to an annotation or widget.
Append a number to 'stem' such that the result is a unique name.
'''
assert isinstance(annot, mupdf.PdfAnnot)
page = mupdf.pdf_annot_page( annot)
annot_obj = mupdf.pdf_annot_obj( annot)
names = JM_get_annot_id_list(page)
i = 0
while 1:
stem_id = f'{JM_annot_id_stem}-{stem}{i}'
if stem_id not in names:
break
i += 1
response = JM_StrAsChar(stem_id)
name = mupdf.pdf_new_string( response, len(response))
mupdf.pdf_dict_puts(annot_obj, "NM", name)
page.doc().m_internal.resynth_required = 0
def JM_add_oc_object(pdf, ref, xref):
'''
Add OC object reference to a dictionary
'''
indobj = mupdf.pdf_new_indirect(pdf, xref, 0)
if not mupdf.pdf_is_dict(indobj):
RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type'))
if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0
or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0
):
mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj)
else:
RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
def JM_annot_border(annot_obj):
dash_py = list()
style = None
width = -1
clouds = -1
obj = None
obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border'))
if mupdf.pdf_is_array( obj):
width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2))
if mupdf.pdf_array_len( obj) == 4:
dash = mupdf.pdf_array_get( obj, 3)
for i in range( mupdf.pdf_array_len( dash)):
val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i))
dash_py.append( val)
bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS'))
if bs_o.m_internal:
width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W')))
style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S')))
if style == '':
style = None
obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D'))
if obj.m_internal:
for i in range( mupdf.pdf_array_len( obj)):
val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i))
dash_py.append( val)
obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
if obj.m_internal:
clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I')))
res = dict()
res[ dictkey_width] = width
res[ dictkey_dashes] = tuple( dash_py)
res[ dictkey_style] = style
res[ 'clouds'] = clouds
return res
def JM_annot_colors(annot_obj):
res = dict()
bc = list() # stroke colors
fc =list() # fill colors
o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C)
if mupdf.pdf_is_array(o):
n = mupdf.pdf_array_len(o)
for i in range(n):
col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
bc.append(col)
res[dictkey_stroke] = bc
o = mupdf.pdf_dict_gets(annot_obj, "IC")
if mupdf.pdf_is_array(o):
n = mupdf.pdf_array_len(o)
for i in range(n):
col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
fc.append(col)
res[dictkey_fill] = fc
return res
def JM_annot_set_border( border, doc, annot_obj):
assert isinstance(border, dict)
obj = None
dashlen = 0
nwidth = border.get( dictkey_width) # new width
ndashes = border.get( dictkey_dashes) # new dashes
nstyle = border.get( dictkey_style) # new style
nclouds = border.get( 'clouds') # new clouds value
# get old border properties
oborder = JM_annot_border( annot_obj)
# delete border-related entries
mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS'))
mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE'))
mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border'))
# populate border items: keep old values for any omitted new ones
if nwidth < 0:
nwidth = oborder.get( dictkey_width) # no new width: keep current
if ndashes is None:
ndashes = oborder.get( dictkey_dashes) # no new dashes: keep old
if nstyle is None:
nstyle = oborder.get( dictkey_style) # no new style: keep old
if nclouds < 0:
nclouds = oborder.get( "clouds") # no new clouds: keep old
if isinstance( ndashes, tuple) and len( ndashes) > 0:
dashlen = len( ndashes)
darr = mupdf.pdf_new_array( doc, dashlen)
for d in ndashes:
mupdf.pdf_array_push_int( darr, d)
mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D'))
mupdf.pdf_dict_putl(
annot_obj,
mupdf.pdf_new_real( nwidth),
PDF_NAME('BS'),
PDF_NAME('W'),
)
if dashlen == 0:
obj = JM_get_border_style( nstyle)
else:
obj = PDF_NAME('D')
mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S'))
if nclouds > 0:
mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2)
obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C'))
mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds)
def make_escape(ch):
if ch == 92:
return "\\u005c"
elif 32 <= ch <= 127 or ch == 10:
return chr(ch)
elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate
return "\\ufffd"
elif ch <= 0xffff:
return "\\u%04x" % ch
else:
return "\\U%08x" % ch
def JM_append_rune(buff, ch):
"""
APPEND non-ascii runes in unicode escape format to fz_buffer.
"""
mupdf.fz_append_string(buff, make_escape(ch))
def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n):
'''
Functions for wordlist output
'''
s = JM_EscapeStrFromBuffer(buff)
litem = (
wbbox.x0,
wbbox.y0,
wbbox.x1,
wbbox.y1,
s,
block_n,
line_n,
word_n,
)
lines.append(litem)
return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY) # word counter
def JM_add_layer_config( pdf, name, creator, ON):
'''
Add OC configuration to the PDF catalog
'''
ocp = JM_ensure_ocproperties( pdf)
configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs'))
if not mupdf.pdf_is_array( configs):
configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1)
D = mupdf.pdf_new_dict( pdf, 5)
mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name)
if creator is not None:
mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator)
mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF'))
onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5)
if not ON:
pass
else:
ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs'))
n = len(ON)
for i in range(n):
xref = 0
e, xref = JM_INT_ITEM(ON, i)
if e == 1:
continue
ind = mupdf.pdf_new_indirect( pdf, xref, 0)
if mupdf.pdf_array_contains( ocgs, ind):
mupdf.pdf_array_push( onarray, ind)
mupdf.pdf_array_push( configs, D)
def JM_char_bbox(line, ch):
'''
return rect of char quad
'''
q = JM_char_quad(line, ch)
r = mupdf.fz_rect_from_quad(q)
if not line.m_internal.wmode:
return r
if r.y1 < r.y0 + ch.m_internal.size:
r.y0 = r.y1 - ch.m_internal.size
return r
def JM_char_font_flags(font, line, ch):
flags = detect_super_script(line, ch)
flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC
flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED
flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED
flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD
return flags
def JM_char_quad(line, ch):
'''
re-compute char quad if ascender/descender values make no sense
'''
if 1 and g_use_extra:
# This reduces time taken to extract text from PyMuPDF.pdf from 20s to
# 15s.
return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal))
assert isinstance(line, mupdf.FzStextLine)
assert isinstance(ch, mupdf.FzStextChar)
if _globals.skip_quad_corrections: # no special handling
return ch.quad
if line.m_internal.wmode: # never touch vertical write mode
return ch.quad
font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))
asc = JM_font_ascender(font)
dsc = JM_font_descender(font)
fsize = ch.m_internal.size
asc_dsc = asc - dsc + FLT_EPSILON
if asc_dsc >= 1 and _globals.small_glyph_heights == 0: # no problem
return mupdf.FzQuad(ch.m_internal.quad)
# Re-compute quad with adjusted ascender / descender values:
# Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
# re-rotate and move back to ch->origin location.
fsize = ch.m_internal.size
bbox = mupdf.fz_font_bbox(font)
fwidth = bbox.x1 - bbox.x0
if asc < 1e-3: # probably Tesseract glyphless font
dsc = -0.1
asc = 0.9
asc_dsc = 1.0
if _globals.small_glyph_heights or asc_dsc < 1:
dsc = dsc / asc_dsc
asc = asc / asc_dsc
asc_dsc = asc - dsc
asc = asc * fsize / asc_dsc
dsc = dsc * fsize / asc_dsc
# Re-compute quad with the adjusted ascender / descender values:
# Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
# re-rotate and move back to ch->origin location.
c = line.m_internal.dir.x # cosine
s = line.m_internal.dir.y # sine
trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate
trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate
if (c == -1): # left-right flip
trm1.d = 1
trm2.d = 1
xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y)
xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y)
quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1) # move origin to (0,0)
quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners
# adjust vertical coordinates
if c == 1 and quad.ul.y > 0: # up-down flip
quad.ul.y = asc
quad.ur.y = asc
quad.ll.y = dsc
quad.lr.y = dsc
else:
quad.ul.y = -asc
quad.ur.y = -asc
quad.ll.y = -dsc
quad.lr.y = -dsc
# adjust horizontal coordinates that are too crazy:
# (1) left x must be >= 0
# (2) if bbox width is 0, lookup char advance in font.
if quad.ll.x < 0:
quad.ll.x = 0
quad.ul.x = 0
cwidth = quad.lr.x - quad.ll.x
if cwidth < FLT_EPSILON:
glyph = mupdf.fz_encode_character( font, ch.m_internal.c)
if glyph:
fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode)
quad.lr.x = quad.ll.x + fwidth * fsize
quad.ur.x = quad.lr.x
quad = mupdf.fz_transform_quad(quad, trm2) # rotate back
quad = mupdf.fz_transform_quad(quad, xlate2) # translate back
return quad
def JM_choice_options(annot):
'''
return list of choices for list or combo boxes
'''
annot_obj = mupdf.pdf_annot_obj( annot.this)
if mupdf_version_tuple >= (1, 24):
opts = mupdf.pdf_choice_widget_options2( annot, 0)
else:
# pdf_choice_widget_options() is not usable from python, so we
# implement it ourselves here.
#
def pdf_choice_widget_options( annot, exportval):
#log( '{=type(annot)}')
optarr = mupdf.pdf_dict_get_inheritable( mupdf.pdf_annot_obj(annot.this), PDF_NAME('Opt'))
#log( '{optarr=}')
n = mupdf.pdf_array_len(optarr)
opts = []
if not n:
return opts
optarr = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Opt'))
for i in range(n):
m = mupdf.pdf_array_len(mupdf.pdf_array_get(optarr, i))
if m == 2:
val = (
mupdf.pdf_to_text_string(mupdf.pdf_array_get(mupdf.pdf_array_get(optarr, i), 0)),
mupdf.pdf_to_text_string(mupdf.pdf_array_get(mupdf.pdf_array_get(optarr, i), 1)),
)
opts.append(val)
else:
val = JM_UnicodeFromStr(mupdf.pdf_to_text_string(mupdf.pdf_array_get(optarr, i)))
opts.append(val)
return opts
opts = pdf_choice_widget_options( annot, 0)
n = len( opts)
if n == 0:
return # wrong widget type
optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt'))
liste = []
for i in range( n):
m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i))
if m == 2:
val = (
mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)),
mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)),
)
liste.append( val)
else:
val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i))
liste.append( val)
return liste
def JM_clear_pixmap_rect_with_value(dest, value, b):
'''
Clear a pixmap rectangle - my version also supports non-alpha pixmaps
'''
b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest))
w = b.x1 - b.x0
y = b.y1 - b.y0
if w <= 0 or y <= 0:
return 0
destspan = dest.stride()
destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
# CMYK needs special handling (and potentially any other subtractive colorspaces)
if mupdf.fz_colorspace_n(dest.colorspace()) == 4:
value = 255 - value
while 1:
s = destp
for x in range(0, w):
mupdf.fz_samples_set(dest, s, 0)
s += 1
mupdf.fz_samples_set(dest, s, 0)
s += 1
mupdf.fz_samples_set(dest, s, 0)
s += 1
mupdf.fz_samples_set(dest, s, value)
s += 1
if dest.alpha():
mupdf.fz_samples_set(dest, s, 255)
s += 1
destp += destspan
if y == 0:
break
y -= 1
return 1
while 1:
s = destp
for x in range(w):
for k in range(dest.n()-1):
mupdf.fz_samples_set(dest, s, value)
s += 1
if dest.alpha():
mupdf.fz_samples_set(dest, s, 255)
s += 1
else:
mupdf.fz_samples_set(dest, s, value)
s += 1
destp += destspan
if y == 0:
break
y -= 1
return 1
def JM_color_FromSequence(color):
if isinstance(color, (int, float)): # maybe just a single float
color = color[0]
if not isinstance( color, (list, tuple)):
return -1, []
if len(color) not in (0, 1, 3, 4):
return -1, []
ret = color[:]
for i in range(len(ret)):
if ret[i] < 0 or ret[i] > 1:
ret[i] = 1
return len(ret), ret
def JM_color_count( pm, clip):
rc = dict()
cnt = 0
irect = mupdf.fz_pixmap_bbox( pm)
irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip)))
stride = pm.stride()
width = irect.x1 - irect.x0
height = irect.y1 - irect.y0
n = pm.n()
substride = width * n
s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n
oldpix = _read_samples( pm, s, n)
cnt = 0
if mupdf.fz_is_empty_irect(irect):
return rc
for i in range( height):
for j in range( 0, substride, n):
newpix = _read_samples( pm, s + j, n)
if newpix != oldpix:
pixel = oldpix
c = rc.get( pixel, None)
if c is not None:
cnt += c
rc[ pixel] = cnt
cnt = 1
oldpix = newpix
else:
cnt += 1
s += stride
pixel = oldpix
c = rc.get( pixel)
if c is not None:
cnt += c
rc[ pixel] = cnt
return rc
def JM_compress_buffer(inbuffer):
'''
compress char* into a new buffer
'''
data, compressed_length = mupdf.fz_new_deflated_data_from_buffer(
inbuffer,
mupdf.FZ_DEFLATE_BEST,
)
#log( '{=data compressed_length}')
if not data or compressed_length == 0:
return None
buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length))
mupdf.fz_resize_buffer(buf, compressed_length)
return buf
def JM_copy_rectangle(page, area):
need_new_line = 0
buffer = io.StringIO()
for block in page:
if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
continue
for line in block:
line_had_text = 0
for ch in line:
r = JM_char_bbox(line, ch)
if JM_rects_overlap(area, r):
line_had_text = 1
if need_new_line:
buffer.write("\n")
need_new_line = 0
buffer.write(make_escape(ch.m_internal.c))
if line_had_text:
need_new_line = 1
s = buffer.getvalue() # take over the data
return s
def JM_convert_to_pdf(doc, fp, tp, rotate):
'''
Convert any MuPDF document to a PDF
Returns bytes object containing the PDF, created via 'write' function.
'''
pdfout = mupdf.PdfDocument()
incr = 1
s = fp
e = tp
if fp > tp:
incr = -1 # count backwards
s = tp # adjust ...
e = fp # ... range
rot = JM_norm_rotation(rotate)
i = fp
while 1: # interpret & write document pages as PDF pages
if not _INRANGE(i, s, e):
break
page = mupdf.fz_load_page(doc, i)
mediabox = mupdf.fz_bound_page(page)
dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox)
mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
mupdf.fz_close_device(dev)
dev = None
page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
mupdf.pdf_insert_page(pdfout, -1, page_obj)
i += incr
# PDF created - now write it to Python bytearray
# prepare write options structure
opts = mupdf.PdfWriteOptions()
opts.do_garbage = 4
opts.do_compress = 1
opts.do_compress_images = 1
opts.do_compress_fonts = 1
opts.do_sanitize = 1
opts.do_incremental = 0
opts.do_ascii = 0
opts.do_decompress = 0
opts.do_linear = 0
opts.do_clean = 1
opts.do_pretty = 0
res = mupdf.fz_new_buffer(8192)
out = mupdf.FzOutput(res)
mupdf.pdf_write_document(pdfout, out, opts)
out.fz_close_output()
c = mupdf.fz_buffer_extract_copy(res)
assert isinstance(c, bytes)
return c
# Copied from MuPDF v1.14
# Create widget
def JM_create_widget(doc, page, type, fieldname):
old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags"))
#log( '*** JM_create_widget()')
#log( f'{mupdf.pdf_create_annot_raw=}')
#log( f'{page=}')
#log( f'{mupdf.PDF_ANNOT_WIDGET=}')
annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET)
annot_obj = mupdf.pdf_annot_obj(annot)
try:
JM_set_field_type(doc, annot_obj, type)
mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname)
if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly)
mupdf.pdf_dict_putl(
mupdf.pdf_trailer(doc),
mupdf.pdf_new_nt(sigflags),
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
PDF_NAME('SigFlags'),
)
# pdf_create_annot will have linked the new widget into the page's
# annot array. We also need it linked into the document's form
form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields")
if not form.m_internal:
form = mupdf.pdf_new_array(doc, 1)
mupdf.pdf_dict_putl(
mupdf.pdf_trailer(doc),
form,
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
PDF_NAME('Fields'),
)
mupdf.pdf_array_push(form, annot_obj) # Cleanup relies on this statement being last
except Exception:
if g_exceptions_verbose: exception_info()
mupdf.pdf_delete_annot(page, annot)
if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
mupdf.pdf_dict_putl(
mupdf.pdf_trailer(doc),
mupdf.pdf_new_int(old_sigflags),
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
PDF_NAME('SigFlags'),
)
raise
return annot
def JM_cropbox(page_obj):
'''
return a PDF page's CropBox
'''
if g_use_extra:
return extra.JM_cropbox(page_obj)
mediabox = JM_mediabox(page_obj)
cropbox = mupdf.pdf_to_rect(
mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox'))
)
if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox):
cropbox = mediabox
y0 = mediabox.y1 - cropbox.y1
y1 = mediabox.y1 - cropbox.y0
cropbox.y0 = y0
cropbox.y1 = y1
return cropbox
def JM_cropbox_size(page_obj):
rect = JM_cropbox(page_obj)
w = abs(rect.x1 - rect.x0)
h = abs(rect.y1 - rect.y0)
size = mupdf.fz_make_point(w, h)
return size
def JM_derotate_page_matrix(page):
'''
just the inverse of rotation
'''
mp = JM_rotate_page_matrix(page)
return mupdf.fz_invert_matrix(mp)
def JM_embed_file(
pdf,
buf,
filename,
ufilename,
desc,
compress,
):
'''
embed a new file in a PDF (not only /EmbeddedFiles entries)
'''
len_ = 0
val = mupdf.pdf_new_dict(pdf, 6)
mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4)
ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4)
mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename)
mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename)
mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc)
mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec'))
bs = b' '
f = mupdf.pdf_add_stream(
pdf,
#mupdf.fz_fz_new_buffer_from_copied_data(bs),
mupdf.fz_new_buffer_from_copied_data(bs),
mupdf.PdfObj(),
0,
)
mupdf.pdf_dict_put(ef, PDF_NAME('F'), f)
JM_update_stream(pdf, f, buf, compress)
len_, _ = mupdf.fz_buffer_storage(buf)
mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_)
mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_)
params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4)
mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_)
return val
def JM_embedded_clean(pdf):
'''
perform some cleaning if we have /EmbeddedFiles:
(1) remove any /Limits if /Names exists
(2) remove any empty /Collection
(3) set /PageMode/UseAttachments
'''
root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
# remove any empty /Collection entry
coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection'))
if coll.m_internal and mupdf.pdf_dict_len(coll) == 0:
mupdf.pdf_dict_del(root, PDF_NAME('Collection'))
efiles = mupdf.pdf_dict_getl(
root,
PDF_NAME('Names'),
PDF_NAME('EmbeddedFiles'),
PDF_NAME('Names'),
)
if efiles.m_internal:
mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments")
def JM_EscapeStrFromBuffer(buff):
if not buff.m_internal:
return ''
s = mupdf.fz_buffer_extract_copy(buff)
val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace')
return val
def JM_ensure_identity(pdf):
'''
Store ID in PDF trailer
'''
id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
if not id_.m_internal:
rnd0 = mupdf.fz_memrnd2(16)
# Need to convert raw bytes into a str to send to
# mupdf.pdf_new_string(). chr() seems to work for this.
rnd = ''
for i in rnd0:
rnd += chr(i)
id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2)
mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
def JM_ensure_ocproperties(pdf):
'''
Ensure OCProperties, return /OCProperties key
'''
ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties'))
if ocp.m_internal:
return ocp
root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2)
mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0)
D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5)
mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0)
mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0)
mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0)
mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0)
return ocp
def JM_expand_fname(name):
'''
Make /DA string of annotation
'''
if not name: return "Helv"
if name.startswith("Co"): return "Cour"
if name.startswith("co"): return "Cour"
if name.startswith("Ti"): return "TiRo"
if name.startswith("ti"): return "TiRo"
if name.startswith("Sy"): return "Symb"
if name.startswith("sy"): return "Symb"
if name.startswith("Za"): return "ZaDb"
if name.startswith("za"): return "ZaDb"
return "Helv"
def JM_field_type_text(wtype):
'''
String from widget type
'''
if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON:
return "Button"
if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
return "CheckBox"
if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
return "RadioButton"
if wtype == mupdf.PDF_WIDGET_TYPE_TEXT:
return "Text"
if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX:
return "ListBox"
if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
return "ComboBox"
if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
return "Signature"
return "unknown"
def JM_fill_pixmap_rect_with_color(dest, col, b):
assert isinstance(dest, mupdf.FzPixmap)
# fill a rect with a color tuple
b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
w = b.x1 - b.x0
y = b.y1 - b.y0
if w <= 0 or y <= 0:
return 0
destspan = dest.stride()
destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
while 1:
s = destp
for x in range(w):
for i in range( dest.n()):
mupdf.fz_samples_set(dest, s, col[i])
s += 1
destp += destspan
y -= 1
if y == 0:
break
return 1
def JM_find_annot_irt(annot):
'''
Return the first annotation whose /IRT key ("In Response To") points to
annot. Used to remove the response chain of a given annotation.
'''
assert isinstance(annot, mupdf.PdfAnnot)
irt_annot = None # returning this
annot_obj = mupdf.pdf_annot_obj(annot)
found = 0
# loop thru MuPDF's internal annots array
page = mupdf.pdf_annot_page(annot)
irt_annot = mupdf.pdf_first_annot(page)
while 1:
assert isinstance(irt_annot, mupdf.PdfAnnot)
if not irt_annot.m_internal:
break
irt_annot_obj = mupdf.pdf_annot_obj(irt_annot)
o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT')
if o.m_internal:
if not mupdf.pdf_objcmp(o, annot_obj):
found = 1
break
irt_annot = mupdf.pdf_next_annot(irt_annot)
if found:
return irt_annot
def JM_font_ascender(font):
'''
need own versions of ascender / descender
'''
assert isinstance(font, mupdf.FzFont)
if _globals.skip_quad_corrections:
return 0.8
return mupdf.fz_font_ascender(font)
def JM_font_descender(font):
'''
need own versions of ascender / descender
'''
assert isinstance(font, mupdf.FzFont)
if _globals.skip_quad_corrections:
return -0.2
ret = mupdf.fz_font_descender(font)
return ret
def JM_is_word_delimiter(ch, delimiters):
"""Check if ch is an extra word delimiting character.
"""
if ch <= 32 or ch == 160: # any whitespace?
return True
if not delimiters: # no extra delimiters provided
return False
char = chr(ch)
for d in delimiters:
if d == char:
return True
return False
def JM_font_name(font):
assert isinstance(font, mupdf.FzFont)
name = mupdf.fz_font_name(font)
s = name.find('+')
if _globals.subset_fontnames or s == -1 or s != 6:
return name
return name[s + 1:]
def JM_gather_fonts(pdf, dict_, fontlist, stream_xref):
rc = 1
n = mupdf.pdf_dict_len(dict_)
for i in range(n):
refname = mupdf.pdf_dict_get_key(dict_, i)
fontdict = mupdf.pdf_dict_get_val(dict_, i)
if not mupdf.pdf_is_dict(fontdict):
mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)")
continue
subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype)
basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont)
if not basefont.m_internal or mupdf.pdf_is_null(basefont):
name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name)
else:
name = basefont
encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding)
if mupdf.pdf_is_dict(encoding):
encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding)
xref = mupdf.pdf_to_num(fontdict)
ext = "n/a"
if xref:
ext = JM_get_fontextension(pdf, xref)
entry = (
xref,
ext,
mupdf.pdf_to_name(subtype),
JM_EscapeStrFromStr(mupdf.pdf_to_name(name)),
mupdf.pdf_to_name(refname),
mupdf.pdf_to_name(encoding),
stream_xref,
)
fontlist.append(entry)
return rc
def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
'''
Store info of a /Form xobject in Python list
'''
assert isinstance(doc, mupdf.PdfDocument)
rc = 1
n = mupdf.pdf_dict_len(dict_)
for i in range(n):
refname = mupdf.pdf_dict_get_key( dict_, i)
imagedict = mupdf.pdf_dict_get_val(dict_, i)
if not mupdf.pdf_is_dict(imagedict):
mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
continue
type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')):
continue
o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox'))
m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix'))
if m.m_internal:
mat = mupdf.pdf_to_matrix(m)
else:
mat = mupdf.FzMatrix()
if o.m_internal:
bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat)
else:
bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
xref = mupdf.pdf_to_num(imagedict)
entry = (
xref,
mupdf.pdf_to_name( refname),
stream_xref,
JM_py_from_rect(bbox),
)
imagelist.append(entry)
return rc
def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
'''
Store info of an image in Python list
'''
rc = 1
n = mupdf.pdf_dict_len( dict_)
for i in range(n):
refname = mupdf.pdf_dict_get_key(dict_, i)
imagedict = mupdf.pdf_dict_get_val(dict_, i)
if not mupdf.pdf_is_dict(imagedict):
mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
continue
type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')):
continue
xref = mupdf.pdf_to_num(imagedict)
gen = 0
smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask'))
if smask.m_internal:
gen = mupdf.pdf_to_num(smask)
filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F'))
if mupdf.pdf_is_array(filter_):
filter_ = mupdf.pdf_array_get(filter_, 0)
altcs = mupdf.PdfObj(0)
cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS'))
if mupdf.pdf_is_array(cs):
cses = cs
cs = mupdf.pdf_array_get(cses, 0)
if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN'))
or mupdf.pdf_name_eq(cs, PDF_NAME('Separation'))
):
altcs = mupdf.pdf_array_get(cses, 2)
if mupdf.pdf_is_array(altcs):
altcs = mupdf.pdf_array_get(altcs, 0)
width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W'))
height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H'))
bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC'))
entry = (
xref,
gen,
mupdf.pdf_to_int(width),
mupdf.pdf_to_int(height),
mupdf.pdf_to_int(bpc),
JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)),
JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)),
JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)),
JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)),
stream_xref,
)
imagelist.append(entry)
return rc
def JM_get_annot_by_xref(page, xref):
'''
retrieve annot by its xref
'''
assert isinstance(page, mupdf.PdfPage)
found = 0
# loop thru MuPDF's internal annots array
annot = mupdf.pdf_first_annot(page)
while 1:
if not annot.m_internal:
break
if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)):
found = 1
break
annot = mupdf.pdf_next_annot( annot)
if not found:
raise Exception("xref %d is not an annot of this page" % xref)
return annot
def JM_get_annot_by_name(page, name):
'''
retrieve annot by name (/NM key)
'''
assert isinstance(page, mupdf.PdfPage)
if not name:
return
found = 0
# loop thru MuPDF's internal annots and widget arrays
annot = mupdf.pdf_first_annot(page)
while 1:
if not annot.m_internal:
break
response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM"))
if name == response:
found = 1
break
annot = mupdf.pdf_next_annot(annot)
if not found:
raise Exception("'%s' is not an annot of this page" % name)
return annot
def JM_get_annot_id_list(page):
names = []
annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots)
if not annots.m_internal:
return names
for i in range( mupdf.pdf_array_len(annots)):
annot_obj = mupdf.pdf_array_get(annots, i)
name = mupdf.pdf_dict_gets(annot_obj, "NM")
if name.m_internal:
names.append(
mupdf.pdf_to_text_string(name)
)
return names
def JM_get_annot_xref_list( page_obj):
'''
return the xrefs and /NM ids of a page's annots, links and fields
'''
if g_use_extra:
names = extra.JM_get_annot_xref_list( page_obj)
return names
names = []
annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots'))
n = mupdf.pdf_array_len( annots)
for i in range( n):
annot_obj = mupdf.pdf_array_get( annots, i)
xref = mupdf.pdf_to_num( annot_obj)
subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype'))
if not subtype.m_internal:
continue # subtype is required
type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype))
if type_ == mupdf.PDF_ANNOT_UNKNOWN:
continue # only accept valid annot types
id_ = mupdf.pdf_dict_gets( annot_obj, "NM")
names.append( (xref, type_, mupdf.pdf_to_text_string( id_)))
return names
def JM_get_annot_xref_list2(page):
page = page._pdf_page()
if not page.m_internal:
return list()
return JM_get_annot_xref_list( page.obj())
def JM_get_border_style(style):
'''
return pdf_obj "border style" from Python str
'''
val = mupdf.PDF_ENUM_NAME_S
if style is None:
return val
s = style
if s.startswith("b") or s.startswith("B"): val = mupdf.PDF_ENUM_NAME_B
elif s.startswith("d") or s.startswith("D"): val = mupdf.PDF_ENUM_NAME_D
elif s.startswith("i") or s.startswith("I"): val = mupdf.PDF_ENUM_NAME_I
elif s.startswith("u") or s.startswith("U"): val = mupdf.PDF_ENUM_NAME_U
elif s.startswith("s") or s.startswith("S"): val = mupdf.PDF_ENUM_NAME_S
return val
def JM_get_font(
fontname,
fontfile,
fontbuffer,
script,
lang,
ordering,
is_bold,
is_italic,
is_serif,
embed,
):
'''
return a fz_font from a number of parameters
'''
def fertig(font):
if not font.m_internal:
raise RuntimeError(MSG_FONT_FAILED)
# if font allows this, set embedding
if not font.m_internal.flags.never_embed:
mupdf.fz_set_font_embedding(font, embed)
return font
index = 0
font = None
if fontfile:
#goto have_file;
font = mupdf.fz_new_font_from_file( None, fontfile, index, 0)
return fertig(font)
if fontbuffer:
#goto have_buffer;
res = JM_BufferFromBytes(fontbuffer)
font = mupdf.fz_new_font_from_buffer( None, res, index, 0)
return fertig(font)
if ordering > -1:
# goto have_cjk;
font = mupdf.fz_new_cjk_font(ordering)
return fertig(font)
if fontname:
# goto have_base14;
# Base-14 or a MuPDF builtin font
font = mupdf.fz_new_base14_font(fontname)
if font.m_internal:
return fertig(font)
font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic)
return fertig(font)
# Check for NOTO font
#have_noto:;
data, size, index = mupdf.fz_lookup_noto_font( script, lang)
font = None
if data:
font = mupdf.fz_new_font_from_memory( None, data, size, index, 0)
if font.m_internal:
return fertig(font)
font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic)
return fertig(font)
def JM_get_fontbuffer(doc, xref):
'''
Return the contents of a font file, identified by xref
'''
if xref < 1:
return
o = mupdf.pdf_load_object(doc, xref)
desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
if desft.m_internal:
obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
else:
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
if not obj.m_internal:
message(f"invalid font - FontDescriptor missing")
return
o = obj
stream = None
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
if obj.m_internal:
stream = obj # ext = "pfa"
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
if obj.m_internal:
stream = obj # ext = "ttf"
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
if obj.m_internal:
stream = obj
obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
if obj.m_internal and not mupdf.pdf_is_name(obj):
message("invalid font descriptor subtype")
return
if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
pass # Prev code did: ext = "cff", but this has no effect.
elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
pass # Prev code did: ext = "cid", but this has no effect.
elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
pass # Prev code did: ext = "otf", but this has no effect. */
else:
message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}')
if not stream:
message('warning: unhandled font type')
return
return mupdf.pdf_load_stream(stream)
def JM_get_resource_properties(ref):
'''
Return the items of Resources/Properties (used for Marked Content)
Argument may be e.g. a page object or a Form XObject
'''
properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties'))
if not properties.m_internal:
return ()
else:
n = mupdf.pdf_dict_len(properties)
if n < 1:
return ()
rc = []
for i in range(n):
key = mupdf.pdf_dict_get_key(properties, i)
val = mupdf.pdf_dict_get_val(properties, i)
c = mupdf.pdf_to_name(key)
xref = mupdf.pdf_to_num(val)
rc.append((c, xref))
return rc
def JM_get_widget_by_xref( page, xref):
'''
retrieve widget by its xref
'''
found = False
annot = mupdf.pdf_first_widget( page)
while annot.m_internal:
annot_obj = mupdf.pdf_annot_obj( annot)
if xref == mupdf.pdf_to_num( annot_obj):
found = True
break
annot = mupdf.pdf_next_widget( annot)
if not found:
raise Exception( f"xref {xref} is not a widget of this page")
return Annot( annot)
def JM_get_widget_properties(annot, Widget):
'''
Populate a Python Widget object with the values from a PDF form field.
Called by "Page.first_widget" and "Widget.next".
'''
#log( '{type(annot)=}')
annot_obj = mupdf.pdf_annot_obj(annot.this)
#log( 'Have called mupdf.pdf_annot_obj()')
page = mupdf.pdf_annot_page(annot.this)
pdf = page.doc()
tw = annot
def SETATTR(key, value):
setattr(Widget, key, value)
def SETATTR_DROP(mod, key, value):
# Original C code for this function deletes if PyObject* is NULL. We
# don't have a representation for that in Python - e.g. None is not
# represented by NULL.
setattr(mod, key, value)
#log( '=== + mupdf.pdf_widget_type(tw)')
field_type = mupdf.pdf_widget_type(tw.this)
#log( '=== - mupdf.pdf_widget_type(tw)')
Widget.field_type = field_type
if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
if mupdf.pdf_signature_is_signed(pdf, annot_obj):
SETATTR("is_signed", True)
else:
SETATTR("is_signed",False)
else:
SETATTR("is_signed", None)
SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj)))
SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type)))
field_name = mupdf.pdf_load_field_name(annot_obj)
SETATTR_DROP(Widget, "field_name", field_name)
obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('TU'))
if obj.m_internal:
label = mupdf.pdf_to_text_string(obj)
SETATTR_DROP(Widget, "field_label", label)
fvalue = None
if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent')) # owning RB group
if obj.m_internal:
SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj))
obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS'))
if obj.m_internal:
fvalue = mupdf.pdf_to_name(obj)
if not fvalue:
fvalue = mupdf.pdf_field_value(annot_obj)
SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue))
SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj))
border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W')))
if border_width == 0:
border_width = 1
SETATTR_DROP(Widget, "border_width", border_width)
obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D'))
if mupdf.pdf_is_array(obj):
n = mupdf.pdf_array_len(obj)
d = [0] * n
for i in range(n):
d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i))
SETATTR_DROP(Widget, "border_dashes", d)
SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this))
SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this))
obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG'))
if mupdf.pdf_is_array(obj):
n = mupdf.pdf_array_len(obj)
col = [0] * n
for i in range(n):
col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
SETATTR_DROP(Widget, "fill_color", col)
obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC'))
if mupdf.pdf_is_array(obj):
n = mupdf.pdf_array_len(obj)
col = [0] * n
for i in range(n):
col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
SETATTR_DROP(Widget, "border_color", col)
SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot))
da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA')))
SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da))
obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA'))
if obj.m_internal:
SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj)))
SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj))
# call Py method to reconstruct text color, font name, size
Widget._parse_da()
# extract JavaScript action texts
s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A'))
ss = JM_get_script(s)
SETATTR_DROP(Widget, "script", ss)
SETATTR_DROP(Widget, "script_stroke",
JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K')))
)
SETATTR_DROP(Widget, "script_format",
JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F')))
)
SETATTR_DROP(Widget, "script_change",
JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V')))
)
SETATTR_DROP(Widget, "script_calc",
JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C')))
)
SETATTR_DROP(Widget, "script_blur",
JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl')))
)
SETATTR_DROP(Widget, "script_focus",
JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo')))
)
def JM_get_fontextension(doc, xref):
'''
Return the file extension of a font file, identified by xref
'''
if xref < 1:
return "n/a"
o = mupdf.pdf_load_object(doc, xref)
desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
if desft.m_internal:
obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
else:
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
if not obj.m_internal:
return "n/a" # this is a base-14 font
o = obj # we have the FontDescriptor
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
if obj.m_internal:
return "pfa"
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
if obj.m_internal:
return "ttf"
obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
if obj.m_internal:
obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
if obj.m_internal and not mupdf.pdf_is_name(obj):
message("invalid font descriptor subtype")
return "n/a"
if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
return "cff"
elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
return "cid"
elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
return "otf"
else:
message("unhandled font type '%s'", mupdf.pdf_to_name(obj))
return "n/a"
def JM_get_ocg_arrays_imp(arr):
'''
Get OCG arrays from OC configuration
Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list}
'''
list_ = list()
if mupdf.pdf_is_array( arr):
n = mupdf.pdf_array_len( arr)
for i in range(n):
obj = mupdf.pdf_array_get( arr, i)
item = mupdf.pdf_to_num( obj)
if item not in list_:
list_.append(item)
return list_
def JM_get_ocg_arrays(conf):
rc = dict()
arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON'))
list_ = JM_get_ocg_arrays_imp( arr)
if list_:
rc["on"] = list_
arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF'))
list_ = JM_get_ocg_arrays_imp( arr)
if list_:
rc["off"] = list_
arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked'))
list_ = JM_get_ocg_arrays_imp( arr)
if list_:
rc['locked'] = list_
list_ = list()
arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups'))
if mupdf.pdf_is_array( arr):
n = mupdf.pdf_array_len( arr)
for i in range(n):
obj = mupdf.pdf_array_get( arr, i)
list1 = JM_get_ocg_arrays_imp( obj)
list_.append(list1)
if list_:
rc["rbgroups"] = list_
obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState'))
if obj.m_internal:
state = mupdf.pdf_to_name( obj)
rc["basestate"] = state
return rc
def JM_get_page_labels(liste, nums):
n = mupdf.pdf_array_len(nums)
for i in range(0, n, 2):
key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i))
pno = mupdf.pdf_to_int(key)
val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1))
res = JM_object_to_buffer(val, 1, 0)
c = mupdf.fz_buffer_extract(res)
assert isinstance(c, bytes)
c = c.decode('utf-8')
liste.append( (pno, c))
def JM_get_script(key):
'''
JavaScript extractor
Returns either the script source or None. Parameter is a PDF action
dictionary, which must have keys /S and /JS. The value of /S must be
'/JavaScript'. The value of /JS is returned.
'''
if not key.m_internal:
return
j = mupdf.pdf_dict_get(key, PDF_NAME('S'))
jj = mupdf.pdf_to_name(j)
if jj == "JavaScript":
js = mupdf.pdf_dict_get(key, PDF_NAME('JS'))
if not js.m_internal:
return
else:
return
if mupdf.pdf_is_string(js):
script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js))
elif mupdf.pdf_is_stream(js):
res = mupdf.pdf_load_stream(js)
script = JM_EscapeStrFromBuffer(res)
else:
return
if script: # do not return an empty script
return script
return
def JM_have_operation(pdf):
'''
Ensure valid journalling state
'''
if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0):
return 0
return 1
def JM_image_extension(type_):
'''
return extension for MuPDF image type
'''
if type_ == mupdf.FZ_IMAGE_FAX: return "fax"
if type_ == mupdf.FZ_IMAGE_RAW: return "raw"
if type_ == mupdf.FZ_IMAGE_FLATE: return "flate"
if type_ == mupdf.FZ_IMAGE_LZW: return "lzw"
if type_ == mupdf.FZ_IMAGE_RLD: return "rld"
if type_ == mupdf.FZ_IMAGE_BMP: return "bmp"
if type_ == mupdf.FZ_IMAGE_GIF: return "gif"
if type_ == mupdf.FZ_IMAGE_JBIG2: return "jb2"
if type_ == mupdf.FZ_IMAGE_JPEG: return "jpeg"
if type_ == mupdf.FZ_IMAGE_JPX: return "jpx"
if type_ == mupdf.FZ_IMAGE_JXR: return "jxr"
if type_ == mupdf.FZ_IMAGE_PNG: return "png"
if type_ == mupdf.FZ_IMAGE_PNM: return "pnm"
if type_ == mupdf.FZ_IMAGE_TIFF: return "tiff"
#if type_ == mupdf.FZ_IMAGE_PSD: return "psd"
return "n/a"
# fixme: need to avoid using a global for this.
g_img_info = None
def JM_image_filter(opaque, ctm, name, image):
assert isinstance(ctm, mupdf.FzMatrix)
r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
q = mupdf.fz_transform_quad( q, g_img_info_matrix)
temp = name, JM_py_from_quad(q)
g_img_info.append(temp)
def JM_image_profile( imagedata, keep_image):
'''
Return basic properties of an image provided as bytes or bytearray
The function creates an fz_image and optionally returns it.
'''
if not imagedata:
return None # nothing given
len_ = len( imagedata)
if len_ < 8:
message( "bad image data")
return None
c = imagedata
#log( 'calling mfz_recognize_image_format with {c!r=}')
type_ = mupdf.fz_recognize_image_format( c)
if type_ == mupdf.FZ_IMAGE_UNKNOWN:
return None
if keep_image:
res = mupdf.fz_new_buffer_from_copied_data( c, len_)
else:
res = mupdf.fz_new_buffer_from_shared_data( c, len_)
image = mupdf.fz_new_image_from_buffer( res)
ctm = mupdf.fz_image_orientation_matrix( image)
xres, yres = mupdf.fz_image_resolution(image)
orientation = mupdf.fz_image_orientation( image)
cs_name = mupdf.fz_colorspace_name( image.colorspace())
result = dict()
result[ dictkey_width] = image.w()
result[ dictkey_height] = image.h()
result[ "orientation"] = orientation
result[ dictkey_matrix] = JM_py_from_matrix(ctm)
result[ dictkey_xres] = xres
result[ dictkey_yres] = yres
result[ dictkey_colorspace] = image.n()
result[ dictkey_bpc] = image.bpc()
result[ dictkey_ext] = JM_image_extension(type_)
result[ dictkey_cs_name] = cs_name
if keep_image:
result[ dictkey_image] = image
return result
def JM_image_reporter(page):
doc = page.doc()
global g_img_info_matrix
g_img_info_matrix = mupdf.FzMatrix()
mediabox = mupdf.FzRect()
mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)
class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
def __init__(self):
super().__init__()
self.use_virtual_image_filter()
if mupdf_version_tuple >= (1, 23, 11):
def image_filter(self, ctx, ctm, name, image, scissor):
JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
else:
def image_filter(self, ctx, ctm, name, image):
JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
sanitize_filter_options = SanitizeFilterOptions()
filter_options = _make_PdfFilterOptions(
instance_forms=1,
ascii=1,
no_update=1,
sanitize=1,
sopts=sanitize_filter_options,
)
global g_img_info
g_img_info = []
mupdf.pdf_filter_page_contents( doc, page, filter_options)
rc = tuple(g_img_info)
g_img_info = []
return rc
def JM_fitz_config():
have_TOFU = not hasattr(mupdf, 'TOFU')
have_TOFU_BASE14 = not hasattr(mupdf, 'TOFU_BASE14')
have_TOFU_CJK = not hasattr(mupdf, 'TOFU_CJK')
have_TOFU_CJK_EXT = not hasattr(mupdf, 'TOFU_CJK_EXT')
have_TOFU_CJK_LANG = not hasattr(mupdf, 'TOFU_CJK_LANG')
have_TOFU_EMOJI = not hasattr(mupdf, 'TOFU_EMOJI')
have_TOFU_HISTORIC = not hasattr(mupdf, 'TOFU_HISTORIC')
have_TOFU_SIL = not hasattr(mupdf, 'TOFU_SIL')
have_TOFU_SYMBOL = not hasattr(mupdf, 'TOFU_SYMBOL')
ret = dict()
ret["base14"] = have_TOFU_BASE14
ret["cbz"] = bool(mupdf.FZ_ENABLE_CBZ)
ret["epub"] = bool(mupdf.FZ_ENABLE_EPUB)
ret["html"] = bool(mupdf.FZ_ENABLE_HTML)
ret["icc"] = bool(mupdf.FZ_ENABLE_ICC)
ret["img"] = bool(mupdf.FZ_ENABLE_IMG)
ret["jpx"] = bool(mupdf.FZ_ENABLE_JPX)
ret["js"] = bool(mupdf.FZ_ENABLE_JS)
ret["pdf"] = bool(mupdf.FZ_ENABLE_PDF)
ret["plotter-cmyk"] = bool(mupdf.FZ_PLOTTERS_CMYK)
ret["plotter-g"] = bool(mupdf.FZ_PLOTTERS_G)
ret["plotter-n"] = bool(mupdf.FZ_PLOTTERS_N)
ret["plotter-rgb"] = bool(mupdf.FZ_PLOTTERS_RGB)
ret["py-memory"] = bool(JM_MEMORY)
ret["svg"] = bool(mupdf.FZ_ENABLE_SVG)
ret["tofu"] = have_TOFU
ret["tofu-cjk"] = have_TOFU_CJK
ret["tofu-cjk-ext"] = have_TOFU_CJK_EXT
ret["tofu-cjk-lang"] = have_TOFU_CJK_LANG
ret["tofu-emoji"] = have_TOFU_EMOJI
ret["tofu-historic"] = have_TOFU_HISTORIC
ret["tofu-sil"] = have_TOFU_SIL
ret["tofu-symbol"] = have_TOFU_SYMBOL
ret["xps"] = bool(mupdf.FZ_ENABLE_XPS)
return ret
def JM_insert_contents(pdf, pageref, newcont, overlay):
'''
Insert a buffer as a new separate /Contents object of a page.
1. Create a new stream object from buffer 'newcont'
2. If /Contents already is an array, then just prepend or append this object
3. Else, create new array and put old content obj and this object into it.
If the page had no /Contents before, just create a 1-item array.
'''
contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents'))
newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0)
xref = mupdf.pdf_to_num(newconts)
if mupdf.pdf_is_array(contents):
if overlay: # append new object
mupdf.pdf_array_push(contents, newconts)
else: # prepend new object
mupdf.pdf_array_insert(contents, newconts, 0)
else:
carr = mupdf.pdf_new_array(pdf, 5)
if overlay:
if contents.m_internal:
mupdf.pdf_array_push(carr, contents)
mupdf.pdf_array_push(carr, newconts)
else:
mupdf.pdf_array_push(carr, newconts)
if contents.m_internal:
mupdf.pdf_array_push(carr, contents)
mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr)
return xref
def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
'''
Insert a font in a PDF
'''
font = None
res = None
data = None
ixref = 0
index = 0
simple = 0
value=None
name=None
subt=None
exto = None
ENSURE_OPERATION(pdf)
# check for CJK font
if ordering > -1:
data, size, index = mupdf.fz_lookup_cjk_font(ordering)
if data:
font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif)
exto = "n/a"
simple = 0
#goto weiter;
else:
# check for PDF Base-14 font
if bfname:
data, size = mupdf.fz_lookup_base14_font(bfname)
if data:
font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
exto = "n/a"
simple = 1
#goto weiter;
else:
if fontfile:
font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0)
else:
res = JM_BufferFromBytes(fontbuffer)
if not res.m_internal:
RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError)
font = mupdf.fz_new_font_from_buffer(None, res, idx, 0)
if not set_simple:
font_obj = mupdf.pdf_add_cid_font(pdf, font)
simple = 0
else:
font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
simple = 2
#weiter: ;
ixref = mupdf.pdf_to_num(font_obj)
name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont'))))
subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype'))))
if not exto:
exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref))
asc = mupdf.fz_font_ascender(font)
dsc = mupdf.fz_font_descender(font)
value = [
ixref,
{
"name": name, # base font name
"type": subt, # subtype
"ext": exto, # file extension
"simple": bool(simple), # simple font?
"ordering": ordering, # CJK font?
"ascender": asc,
"descender": dsc,
},
]
return value
def JM_invert_pixmap_rect( dest, b):
'''
invert a rectangle - also supports non-alpha pixmaps
'''
assert isinstance( dest, mupdf.FzPixmap)
assert isinstance( b, mupdf.FzIrect)
b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
w = b.x1 - b.x0
y = b.y1 - b.y0
if w <= 0 or y <= 0:
return 0
destspan = dest.stride()
destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
n0 = dest.n() - dest.alpha()
alpha = dest.alpha()
while 1:
s = destp
for x in range( w):
for i in range( n0):
ss = mupdf.fz_samples_get( dest, s)
ss = 255 - ss
mupdf.fz_samples_set( dest, s, ss)
s += 1
if alpha:
ss = mupdf.fz_samples_get( dest, s)
ss += 1
mupdf.fz_samples_set( dest, s, ss)
destp += destspan
y -= 1
if y == 0:
break
return 1
def JM_irect_from_py(r):
'''
PySequence to mupdf.FzIrect. Default: infinite irect
'''
if isinstance(r, mupdf.FzIrect):
return r
if isinstance(r, IRect):
r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1)
return r
if isinstance(r, Rect):
ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1)
ret = mupdf.FzIrect(ret) # Uses fz_irect_from_rect().
return ret
if isinstance(r, mupdf.FzRect):
ret = mupdf.FzIrect(r) # Uses fz_irect_from_rect().
return ret
if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
return mupdf.FzIrect(mupdf.fz_infinite_irect)
f = [0, 0, 0, 0]
for i in range(4):
f[i] = r[i]
if f[i] is None:
return mupdf.FzIrect(mupdf.fz_infinite_irect)
if f[i] < FZ_MIN_INF_RECT:
f[i] = FZ_MIN_INF_RECT
if f[i] > FZ_MAX_INF_RECT:
f[i] = FZ_MAX_INF_RECT
return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
def JM_is_jbig2_image(dict_):
# fixme: should we remove this function?
return 0
#filter_ = pdf_dict_get(ctx, dict_, PDF_NAME(Filter));
#if (pdf_name_eq(ctx, filter_, PDF_NAME(JBIG2Decode)))
# return 1;
#n = pdf_array_len(ctx, filter_);
#for (i = 0; i < n; i++)
# if (pdf_name_eq(ctx, pdf_array_get(ctx, filter_, i), PDF_NAME(JBIG2Decode)))
# return 1;
#return 0;
def JM_listbox_value( annot):
'''
ListBox retrieve value
'''
# may be single value or array
annot_obj = mupdf.pdf_annot_obj( annot)
optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V'))
if mupdf.pdf_is_string( optarr): # a single string
return mupdf.pdf_to_text_string( optarr)
# value is an array (may have len 0)
n = mupdf.pdf_array_len( optarr)
liste = []
# extract a list of strings
# each entry may again be an array: take second entry then
for i in range( n):
elem = mupdf.pdf_array_get( optarr, i)
if mupdf.pdf_is_array( elem):
elem = mupdf.pdf_array_get( elem, 1)
liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem)))
return liste
def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
# PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to
# convert the fz_buffer's `unsigned char*` into a `const char*` suitable
# for passing to pdf_dict_put_text_string(). So instead we build up the
# string directly in Python.
buf = ''
if ncol < 1:
buf += f'0 g '
elif ncol == 1:
buf += f'{col[0]:g} g '
elif ncol == 2:
assert 0
elif ncol == 3:
buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg '
else:
buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k '
buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf'
mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
if g_use_extra:
return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
char_list = None
span_list = []
mupdf.fz_clear_buffer(buff)
span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
class char_style:
def __init__(self, rhs=None):
if rhs:
self.size = rhs.size
self.flags = rhs.flags
self.font = rhs.font
self.color = rhs.color
self.asc = rhs.asc
self.desc = rhs.desc
else:
self.size = -1
self.flags = -1
self.font = ''
self.color = -1
self.asc = 0
self.desc = 0
def __str__(self):
return f'{self.size} {self.flags} {self.font} {self.color} {self.asc} {self.desc}'
old_style = char_style()
style = char_style()
span = None
span_origin = None
for ch in line:
# start-trace
r = JM_char_bbox(line, ch)
if (not JM_rects_overlap(tp_rect, r)
and not mupdf.fz_is_infinite_rect(tp_rect)
):
continue
flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
origin = mupdf.FzPoint(ch.m_internal.origin)
style.size = ch.m_internal.size
style.flags = flags
style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
style.color = ch.m_internal.color
style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
if (style.size != old_style.size
or style.flags != old_style.flags
or style.color != old_style.color
or style.font != old_style.font
):
if old_style.size >= 0:
# not first one, output previous
if raw:
# put character list in the span
span[dictkey_chars] = char_list
char_list = None
else:
# put text string in the span
span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
mupdf.fz_clear_buffer(buff)
span[dictkey_origin] = JM_py_from_point(span_origin)
span[dictkey_bbox] = JM_py_from_rect(span_rect)
line_rect = mupdf.fz_union_rect(line_rect, span_rect)
span_list.append( span)
span = None
span = dict()
asc = style.asc
desc = style.desc
if style.asc < 1e-3:
asc = 0.9
desc = -0.1
span[dictkey_size] = style.size
span[dictkey_flags] = style.flags
span[dictkey_font] = JM_EscapeStrFromStr(style.font)
span[dictkey_color] = style.color
span["ascender"] = asc
span["descender"] = desc
# Need to be careful here - doing 'old_style=style' does a shallow
# copy, but we need to keep old_style as a distinct instance.
old_style = char_style(style)
span_rect = r
span_origin = origin
span_rect = mupdf.fz_union_rect(span_rect, r)
if raw: # make and append a char dict
char_dict = dict()
char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
char_dict[dictkey_bbox] = JM_py_from_rect(r)
char_dict[dictkey_c] = chr(ch.m_internal.c)
if char_list is None:
char_list = []
char_list.append(char_dict)
else: # add character byte to buffer
JM_append_rune(buff, ch.m_internal.c)
# all characters processed, now flush remaining span
if span:
if raw:
span[dictkey_chars] = char_list
char_list = None
else:
span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
mupdf.fz_clear_buffer(buff)
span[dictkey_origin] = JM_py_from_point(span_origin)
span[dictkey_bbox] = JM_py_from_rect(span_rect)
if not mupdf.fz_is_empty_rect(span_rect):
span_list.append(span)
line_rect = mupdf.fz_union_rect(line_rect, span_rect)
span = None
if not mupdf.fz_is_empty_rect(line_rect):
line_dict[dictkey_spans] = span_list
else:
line_dict[dictkey_spans] = span_list
return line_rect
def JM_make_image_block(block, block_dict):
image = block.i_image()
n = mupdf.fz_colorspace_n(image.colorspace())
w = image.w()
h = image.h()
type_ = mupdf.FZ_IMAGE_UNKNOWN
# fz_compressed_image_buffer() is not available because
# `fz_compressed_buffer` is not copyable.
ll_fz_compressed_buffer = mupdf.ll_fz_compressed_image_buffer(image.m_internal)
if ll_fz_compressed_buffer:
type_ = ll_fz_compressed_buffer.params.type
if type_ < mupdf.FZ_IMAGE_BMP or type_ == mupdf.FZ_IMAGE_JBIG2:
type_ = mupdf.FZ_IMAGE_UNKNOWN
bytes_ = None
if ll_fz_compressed_buffer and type_ != mupdf.FZ_IMAGE_UNKNOWN:
buf = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( ll_fz_compressed_buffer.buffer))
ext = JM_image_extension(type_)
else:
buf = mupdf.fz_new_buffer_from_image_as_png(image, mupdf.FzColorParams())
ext = "png"
bytes_ = JM_BinFromBuffer(buf)
block_dict[ dictkey_width] = w
block_dict[ dictkey_height] = h
block_dict[ dictkey_ext] = ext
block_dict[ dictkey_colorspace] = n
block_dict[ dictkey_xres] = image.xres()
block_dict[ dictkey_yres] = image.yres()
block_dict[ dictkey_bpc] = image.bpc()
block_dict[ dictkey_matrix] = JM_py_from_matrix(block.i_transform())
block_dict[ dictkey_size] = len(bytes_)
block_dict[ dictkey_image] = bytes_
def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
if g_use_extra:
return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
line_list = []
block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
#log(f'{block=}')
for line in block:
#log(f'{line=}')
if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox)))
and not mupdf.fz_is_infinite_rect(tp_rect)
):
continue
line_dict = dict()
line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
block_rect = mupdf.fz_union_rect(block_rect, line_rect)
line_dict[dictkey_wmode] = line.m_internal.wmode
line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
line_dict[dictkey_bbox] = JM_py_from_rect(line_rect)
line_list.append(line_dict)
block_dict[dictkey_bbox] = JM_py_from_rect(block_rect)
block_dict[dictkey_lines] = line_list
def JM_make_textpage_dict(tp, page_dict, raw):
if g_use_extra:
return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
text_buffer = mupdf.fz_new_buffer(128)
block_list = []
tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
block_n = -1
#log( 'JM_make_textpage_dict {=tp}')
for block in tp:
block_n += 1
if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))
and not mupdf.fz_is_infinite_rect(tp_rect)
and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE
):
continue
if (not mupdf.fz_is_infinite_rect(tp_rect)
and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)))
):
continue
block_dict = dict()
block_dict[dictkey_number] = block_n
block_dict[dictkey_type] = block.m_internal.type
if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE:
block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
JM_make_image_block(block, block_dict)
else:
JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
block_list.append(block_dict)
page_dict[dictkey_blocks] = block_list
def JM_matrix_from_py(m):
a = [0, 0, 0, 0, 0, 0]
if isinstance(m, mupdf.FzMatrix):
return m
if isinstance(m, Matrix):
return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f)
if not m or not PySequence_Check(m) or PySequence_Size(m) != 6:
return mupdf.FzMatrix()
for i in range(6):
a[i] = JM_FLOAT_ITEM(m, i)
if a[i] is None:
return mupdf.FzRect()
return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5])
def JM_mediabox(page_obj):
'''
return a PDF page's MediaBox
'''
page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
mediabox = mupdf.pdf_to_rect(
mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox'))
)
if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox):
mediabox.x0 = 0
mediabox.y0 = 0
mediabox.x1 = 612
mediabox.y1 = 792
page_mediabox = mupdf.FzRect(
mupdf.fz_min(mediabox.x0, mediabox.x1),
mupdf.fz_min(mediabox.y0, mediabox.y1),
mupdf.fz_max(mediabox.x0, mediabox.x1),
mupdf.fz_max(mediabox.y0, mediabox.y1),
)
if (page_mediabox.x1 - page_mediabox.x0 < 1
or page_mediabox.y1 - page_mediabox.y0 < 1
):
page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
return page_mediabox
def JM_merge_range(
doc_des,
doc_src,
spage,
epage,
apage,
rotate,
links,
annots,
show_progress,
graft_map,
):
'''
Copy a range of pages (spage, epage) from a source PDF to a specified
location (apage) of the target PDF.
If spage > epage, the sequence of source pages is reversed.
'''
if g_use_extra:
return extra.JM_merge_range(
doc_des,
doc_src,
spage,
epage,
apage,
rotate,
links,
annots,
show_progress,
graft_map,
)
afterpage = apage
counter = 0 # copied pages counter
total = mupdf.fz_absi(epage - spage) + 1 # total pages to copy
if spage < epage:
page = spage
while page <= epage:
page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
counter += 1
if show_progress > 0 and counter % show_progress == 0:
message(f"Inserted {counter} of {total} pages.")
page += 1
afterpage += 1
else:
page = spage
while page >= epage:
page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
counter += 1
if show_progress > 0 and counter % show_progress == 0:
message(f"Inserted {counter} of {total} pages.")
page -= 1
afterpage += 1
def JM_merge_resources( page, temp_res):
'''
Merge the /Resources object created by a text pdf device into the page.
The device may have created multiple /ExtGState/Alp? and /Font/F? objects.
These need to be renamed (renumbered) to not overwrite existing page
objects from previous executions.
Returns the next available numbers n, m for objects /Alp<n>, /F<m>.
'''
# page objects /Resources, /Resources/ExtGState, /Resources/Font
resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
# text pdf device objects /ExtGState, /Font
temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState'))
temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font'))
max_alp = -1
max_fonts = -1
# Handle /Alp objects
if mupdf.pdf_is_dict(temp_extg): # any created at all?
n = mupdf.pdf_dict_len(temp_extg)
if mupdf.pdf_is_dict(main_extg): # does page have /ExtGState yet?
for i in range(mupdf.pdf_dict_len(main_extg)):
# get highest number of objects named /Alpxxx
alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i))
if not alp.startswith('Alp'):
continue
j = mupdf.fz_atoi(alp[3:])
if j > max_alp:
max_alp = j
else: # create a /ExtGState for the page
main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n)
max_alp += 1
for i in range(n): # copy over renumbered /Alp objects
alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i))
j = mupdf.fz_atoi(alp[3:]) + max_alp
text = f'Alp{j}'
val = mupdf.pdf_dict_get_val( temp_extg, i)
mupdf.pdf_dict_puts(main_extg, text, val)
if mupdf.pdf_is_dict(main_fonts): # has page any fonts yet?
for i in range(mupdf.pdf_dict_len(main_fonts)): # get max font number
font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i))
if not font.startswith("F"):
continue
j = mupdf.fz_atoi(font[1:])
if j > max_fonts:
max_fonts = j
else: # create a Resources/Font for the page
main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2)
max_fonts += 1
for i in range(mupdf.pdf_dict_len(temp_fonts)): # copy renumbered fonts
font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i))
j = mupdf.fz_atoi(font[1:]) + max_fonts
text = f'F{j}'
val = mupdf.pdf_dict_get_val(temp_fonts, i)
mupdf.pdf_dict_puts(main_fonts, text, val)
return (max_alp, max_fonts) # next available numbers
def JM_mupdf_warning( text):
'''
redirect MuPDF warnings
'''
JM_mupdf_warnings_store.append(text)
if JM_mupdf_show_warnings:
message(f'MuPDF warning: {text}')
def JM_mupdf_error( text):
JM_mupdf_warnings_store.append(text)
if JM_mupdf_show_errors:
message(f'MuPDF error: {text}\n')
def JM_new_bbox_device(rc, inc_layers):
assert isinstance(rc, list)
return JM_new_bbox_device_Device( rc, inc_layers)
def JM_new_buffer_from_stext_page(page):
'''
make a buffer from an stext_page's text
'''
assert isinstance(page, mupdf.FzStextPage)
rect = mupdf.FzRect(page.m_internal.mediabox)
buf = mupdf.fz_new_buffer(256)
for block in page:
if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
for line in block:
for ch in line:
if (not JM_rects_overlap(rect, JM_char_bbox(line, ch))
and not mupdf.fz_is_infinite_rect(rect)
):
continue
mupdf.fz_append_rune(buf, ch.m_internal.c)
mupdf.fz_append_byte(buf, ord('\n'))
mupdf.fz_append_byte(buf, ord('\n'))
return buf
def JM_new_javascript(pdf, value):
'''
make new PDF action object from JavaScript source
Parameters are a PDF document and a Python string.
Returns a PDF action object.
'''
if value is None:
# no argument given
return
data = JM_StrAsChar(value)
if data is None:
# not convertible to char*
return
res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8'))
source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0)
newaction = mupdf.pdf_add_new_dict(pdf, 4)
mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript'))
mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source)
return newaction
def JM_new_output_fileptr(bio):
return JM_new_output_fileptr_Output( bio)
def JM_norm_rotation(rotate):
'''
# return normalized /Rotate value:one of 0, 90, 180, 270
'''
while rotate < 0:
rotate += 360
while rotate >= 360:
rotate -= 360
if rotate % 90 != 0:
return 0
return rotate
def JM_object_to_buffer(what, compress, ascii):
res = mupdf.fz_new_buffer(512)
out = mupdf.FzOutput(res)
mupdf.pdf_print_obj(out, what, compress, ascii)
out.fz_close_output()
mupdf.fz_terminate_buffer(res)
return res
def JM_outline_xrefs(obj, xrefs):
'''
Return list of outline xref numbers. Recursive function. Arguments:
'obj' first OL item
'xrefs' empty Python list
'''
if not obj.m_internal:
return xrefs
thisobj = obj
while thisobj.m_internal:
newxref = mupdf.pdf_to_num( thisobj)
if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal:
# circular ref or top of chain: terminate
break
xrefs.append( newxref)
first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First')) # try go down
if mupdf.pdf_is_dict( first):
xrefs = JM_outline_xrefs( first, xrefs)
thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next')) # try go next
parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent')) # get parent
if not mupdf.pdf_is_dict( thisobj):
thisobj = parent
return xrefs
def JM_page_rotation(page):
'''
return a PDF page's /Rotate value: one of (0, 90, 180, 270)
'''
rotate = 0
obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate)
rotate = mupdf.pdf_to_int(obj)
rotate = JM_norm_rotation(rotate)
return rotate
def JM_pdf_obj_from_str(doc, src):
'''
create PDF object from given string (new in v1.14.0: MuPDF dropped it)
'''
# fixme: seems inefficient to convert to bytes instance then make another
# copy inside fz_new_buffer_from_copied_data(), but no other way?
#
buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8'))
stream = mupdf.fz_open_buffer(buffer_)
lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL)
result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf)
return result
def JM_pixmap_from_display_list(
list_,
ctm,
cs,
alpha,
clip,
seps,
):
'''
Version of fz_new_pixmap_from_display_list (util.c) to also support
rendering of only the 'clip' part of the displaylist rectangle
'''
assert isinstance(list_, mupdf.FzDisplayList)
if seps is None:
seps = mupdf.FzSeparations()
assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}'
rect = mupdf.fz_bound_display_list(list_)
matrix = JM_matrix_from_py(ctm)
rclip = JM_rect_from_py(clip)
rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
rect = mupdf.fz_transform_rect(rect, matrix)
irect = mupdf.fz_round_rect(rect)
assert isinstance( cs, mupdf.FzColorspace)
pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
if alpha:
mupdf.fz_clear_pixmap(pix)
else:
mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
if not mupdf.fz_is_infinite_rect(rclip):
dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect)
mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie())
else:
dev = mupdf.fz_new_draw_device(matrix, pix)
mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie())
mupdf.fz_close_device(dev)
# Use special raw Pixmap constructor so we don't set alpha to true.
return Pixmap( 'raw', pix)
def JM_point_from_py(p):
'''
PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
'''
if isinstance(p, mupdf.FzPoint):
return p
if isinstance(p, Point):
return mupdf.FzPoint(p.x, p.y)
if g_use_extra:
return extra.JM_point_from_py( p)
p0 = mupdf.FzPoint(0, 0)
x = JM_FLOAT_ITEM(p, 0)
y = JM_FLOAT_ITEM(p, 1)
if x is None or y is None:
return p0
x = max( x, FZ_MIN_INF_RECT)
y = max( y, FZ_MIN_INF_RECT)
x = min( x, FZ_MAX_INF_RECT)
y = min( y, FZ_MAX_INF_RECT)
return mupdf.FzPoint(x, y)
def JM_print_stext_page_as_text(res, page):
'''
Plain text output. An identical copy of fz_print_stext_page_as_text,
but lines within a block are concatenated by space instead a new-line
character (which else leads to 2 new-lines).
'''
if 1 and g_use_extra:
return extra.JM_print_stext_page_as_text(res, page)
assert isinstance(res, mupdf.FzBuffer)
assert isinstance(page, mupdf.FzStextPage)
rect = mupdf.FzRect(page.m_internal.mediabox)
last_char = 0
n_blocks = 0
n_lines = 0
n_chars = 0
for n_blocks2, block in enumerate( page):
if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
for n_lines2, line in enumerate( block):
for n_chars2, ch in enumerate( line):
pass
n_chars += n_chars2
n_lines += n_lines2
n_blocks += n_blocks2
for block in page:
if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
for line in block:
last_char = 0
for ch in line:
chbbox = JM_char_bbox(line, ch)
if (mupdf.fz_is_infinite_rect(rect)
or JM_rects_overlap(rect, chbbox)
):
#raw += chr(ch.m_internal.c)
last_char = ch.m_internal.c
#log( '{=last_char!r utf!r}')
JM_append_rune(res, last_char)
if last_char != 10 and last_char > 0:
mupdf.fz_append_string(res, "\n")
def JM_put_script(annot_obj, key1, key2, value):
'''
Create a JavaScript PDF action.
Usable for all object types which support PDF actions, even if the
argument name suggests annotations. Up to 2 key values can be specified, so
JavaScript actions can be stored for '/A' and '/AA/?' keys.
'''
key1_obj = mupdf.pdf_dict_get(annot_obj, key1)
pdf = mupdf.pdf_get_bound_document(annot_obj) # owning PDF
# if no new script given, just delete corresponding key
if not value:
if not key2 or not key2.m_internal:
mupdf.pdf_dict_del(annot_obj, key1)
elif key1_obj.m_internal:
mupdf.pdf_dict_del(key1_obj, key2)
return
# read any existing script as a PyUnicode string
if not key2.m_internal or not key1_obj.m_internal:
script = JM_get_script(key1_obj)
else:
script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2))
# replace old script, if different from new one
if value != script:
newaction = JM_new_javascript(pdf, value)
if not key2.m_internal:
mupdf.pdf_dict_put(annot_obj, key1, newaction)
else:
mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2)
def JM_py_from_irect(r):
return r.x0, r.y0, r.x1, r.y1
def JM_py_from_matrix(m):
return m.a, m.b, m.c, m.d, m.e, m.f
def JM_py_from_point(p):
return p.x, p.y
def JM_py_from_quad(q):
'''
PySequence from fz_quad.
'''
return (
(q.ul.x, q.ul.y),
(q.ur.x, q.ur.y),
(q.ll.x, q.ll.y),
(q.lr.x, q.lr.y),
)
def JM_py_from_rect(r):
return r.x0, r.y0, r.x1, r.y1
def JM_quad_from_py(r):
if isinstance(r, mupdf.FzQuad):
return r
# cover all cases of 4-float-sequences
if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"):
r = mupdf.FzRect(*tuple(r))
if isinstance( r, mupdf.FzRect):
return mupdf.fz_quad_from_rect( r)
if isinstance( r, Quad):
return mupdf.fz_make_quad(
r.ul.x, r.ul.y,
r.ur.x, r.ur.y,
r.ll.x, r.ll.y,
r.lr.x, r.lr.y,
)
q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0)
p = [0,0,0,0]
if not r or not isinstance(r, (tuple, list)) or len(r) != 4:
return q
if JM_FLOAT_ITEM(r, 0) is None:
return mupdf.fz_quad_from_rect(JM_rect_from_py(r))
for i in range(4):
if i >= len(r):
return q # invalid: cancel the rest
obj = r[i] # next point item
if not PySequence_Check(obj) or PySequence_Size(obj) != 2:
return q # invalid: cancel the rest
p[i].x = JM_FLOAT_ITEM(obj, 0)
p[i].y = JM_FLOAT_ITEM(obj, 1)
if p[i].x is None or p[i].y is None:
return q
p[i].x = max( p[i].x, FZ_MIN_INF_RECT)
p[i].y = max( p[i].y, FZ_MIN_INF_RECT)
p[i].x = min( p[i].x, FZ_MAX_INF_RECT)
p[i].y = min( p[i].y, FZ_MAX_INF_RECT)
q.ul = p[0]
q.ur = p[1]
q.ll = p[2]
q.lr = p[3]
return q
def JM_read_contents(pageref):
'''
Read and concatenate a PDF page's /Conents object(s) in a buffer
'''
assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
if mupdf.pdf_is_array(contents):
res = mupdf.FzBuffer(1024)
for i in range(mupdf.pdf_array_len(contents)):
if i > 0:
mupdf.fz_append_byte(res, 32)
obj = mupdf.pdf_array_get(contents, i)
if mupdf.pdf_is_stream(obj):
nres = mupdf.pdf_load_stream(obj)
mupdf.fz_append_buffer(res, nres)
elif contents.m_internal:
res = mupdf.pdf_load_stream(contents)
return res
def JM_rect_from_py(r):
if isinstance(r, mupdf.FzRect):
return r
if isinstance(r, mupdf.FzIrect):
return mupdf.FzRect(r)
if isinstance(r, Rect):
return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
if isinstance(r, IRect):
return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
f = [0, 0, 0, 0]
for i in range(4):
f[i] = JM_FLOAT_ITEM(r, i)
if f[i] is None:
return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
if f[i] < FZ_MIN_INF_RECT:
f[i] = FZ_MIN_INF_RECT
if f[i] > FZ_MAX_INF_RECT:
f[i] = FZ_MAX_INF_RECT
return mupdf.fz_make_rect(f[0], f[1], f[2], f[3])
def JM_rects_overlap(a, b):
if (0
or a.x0 >= b.x1
or a.y0 >= b.y1
or a.x1 <= b.x0
or a.y1 <= b.y0
):
return 0
return 1
def JM_refresh_links( page):
'''
refreshes the link and annotation tables of a page
'''
if not page:
return
obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
if obj.m_internal:
pdf = page.doc()
number = mupdf.pdf_lookup_page_number( pdf, page.obj())
page_mediabox = mupdf.FzRect()
page_ctm = mupdf.FzMatrix()
mupdf.pdf_page_transform( page, page_mediabox, page_ctm)
link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm)
page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal)
def JM_rotate_page_matrix(page):
'''
calculate page rotation matrices
'''
if not page.m_internal:
return mupdf.FzMatrix() # no valid pdf page given
rotation = JM_page_rotation(page)
#log( '{rotation=}')
if rotation == 0:
return mupdf.FzMatrix() # no rotation
cb_size = JM_cropbox_size(page.obj())
w = cb_size.x
h = cb_size.y
#log( '{=h w}')
if rotation == 90:
m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0)
elif rotation == 180:
m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h)
else:
m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w)
#log( 'returning {m=}')
return m
def JM_search_stext_page(page, needle):
if g_use_extra:
return extra.JM_search_stext_page(page.m_internal, needle)
rect = mupdf.FzRect(page.m_internal.mediabox)
if not needle:
return
quads = []
class Hits:
def __str__(self):
return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}'
hits = Hits()
hits.len = 0
hits.quads = quads
hits.hfuzz = 0.2 # merge kerns but not large gaps
hits.vfuzz = 0.1
buffer_ = JM_new_buffer_from_stext_page(page)
haystack_string = mupdf.fz_string_from_buffer(buffer_)
haystack = 0
begin, end = find_string(haystack_string[haystack:], needle)
if begin is None:
#goto no_more_matches;
return quads
begin += haystack
end += haystack
inside = 0
i = 0
for block in page:
if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
continue
for line in block:
for ch in line:
i += 1
if not mupdf.fz_is_infinite_rect(rect):
r = JM_char_bbox(line, ch)
if not JM_rects_overlap(rect, r):
#goto next_char;
continue
while 1:
#try_new_match:
if not inside:
if haystack >= begin:
inside = 1
if inside:
if haystack < end:
on_highlight_char(hits, line, ch)
break
else:
inside = 0
begin, end = find_string(haystack_string[haystack:], needle)
if begin is None:
#goto no_more_matches;
return quads
else:
#goto try_new_match;
begin += haystack
end += haystack
continue
break
haystack += 1
#next_char:;
assert haystack_string[haystack] == '\n', \
f'{haystack=} {haystack_string[haystack]=}'
haystack += 1
assert haystack_string[haystack] == '\n', \
f'{haystack=} {haystack_string[haystack]=}'
haystack += 1
#no_more_matches:;
return quads
def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer):
'''
Step through /Resources, looking up image, xobject or font information
'''
if mupdf.pdf_mark_obj(rsrc):
mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
return # Circular dependencies!
try:
xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject)
if what == 1: # lookup fonts
font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font)
JM_gather_fonts(pdf, font, liste, stream_xref)
elif what == 2: # look up images
JM_gather_images(pdf, xobj, liste, stream_xref)
elif what == 3: # look up form xobjects
JM_gather_forms(pdf, xobj, liste, stream_xref)
else: # should never happen
return
# check if we need to recurse into Form XObjects
n = mupdf.pdf_dict_len(xobj)
for i in range(n):
obj = mupdf.pdf_dict_get_val(xobj, i)
if mupdf.pdf_is_stream(obj):
sxref = mupdf.pdf_to_num(obj)
else:
sxref = 0
subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources)
if subrsrc.m_internal:
sxref_t = sxref
if sxref_t not in tracer:
tracer.append(sxref_t)
JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer)
else:
mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
return
finally:
mupdf.pdf_unmark_obj(rsrc)
def JM_set_choice_options(annot, liste):
'''
set ListBox / ComboBox values
'''
if not liste:
return
assert isinstance( liste, (tuple, list))
n = len( liste)
if n == 0:
return
annot_obj = mupdf.pdf_annot_obj( annot)
pdf = mupdf.pdf_get_bound_document( annot_obj)
optarr = mupdf.pdf_new_array( pdf, n)
for i in range(n):
val = liste[i]
opt = val
if isinstance(opt, str):
mupdf.pdf_array_push_text_string( optarr, opt)
else:
assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list'
opt1, opt2 = val
assert opt1 and opt2, 'bad choice field list'
optarrsub = mupdf.pdf_array_push_array( optarr, 2)
mupdf.pdf_array_push_text_string( optarrsub, opt1)
mupdf.pdf_array_push_text_string( optarrsub, opt2)
mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr)
def JM_set_field_type(doc, obj, type):
'''
Set the field type
'''
setbits = 0
clearbits = 0
typename = None
if type == mupdf.PDF_WIDGET_TYPE_BUTTON:
typename = PDF_NAME('Btn')
setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
typename = PDF_NAME('Btn')
clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
setbits = mupdf.PDF_BTN_FIELD_IS_RADIO
elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
typename = PDF_NAME('Btn')
clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO)
elif type == mupdf.PDF_WIDGET_TYPE_TEXT:
typename = PDF_NAME('Tx')
elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX:
typename = PDF_NAME('Ch')
clearbits = mupdf.PDF_CH_FIELD_IS_COMBO
elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
typename = PDF_NAME('Ch')
setbits = mupdf.PDF_CH_FIELD_IS_COMBO
elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
typename = PDF_NAME('Sig')
if typename:
mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename)
if setbits != 0 or clearbits != 0:
bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff'))
bits &= ~clearbits
bits |= setbits
mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits)
def JM_set_object_value(obj, key, value):
'''
Set a PDF dict key to some value
'''
eyecatcher = "fitz: replace me!"
pdf = mupdf.pdf_get_bound_document(obj)
# split PDF key at path seps and take last key part
list_ = key.split('/')
len_ = len(list_)
i = len_ - 1
skey = list_[i]
del list_[i] # del the last sub-key
len_ = len(list_) # remaining length
testkey = mupdf.pdf_dict_getp(obj, key) # check if key already exists
if not testkey.m_internal:
#No, it will be created here. But we cannot allow this happening if
#indirect objects are referenced. So we check all higher level
#sub-paths for indirect references.
while len_ > 0:
t = '/'.join(list_) # next high level
if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))):
raise Exception("path to '%s' has indirects", JM_StrAsChar(skey))
del list_[len_ - 1] # del last sub-key
len_ = len(list_) # remaining length
# Insert our eyecatcher. Will create all sub-paths in the chain, or
# respectively remove old value of key-path.
mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher))
testkey = mupdf.pdf_dict_getp(obj, key)
if not mupdf.pdf_is_string(testkey):
raise Exception("cannot insert value for '%s'", key)
temp = mupdf.pdf_to_text_string(testkey)
if temp != eyecatcher:
raise Exception("cannot insert value for '%s'", key)
# read the result as a string
res = JM_object_to_buffer(obj, 1, 0)
objstr = JM_EscapeStrFromBuffer(res)
# replace 'eyecatcher' by desired 'value'
nullval = "/%s(%s)" % ( skey, eyecatcher)
newval = "/%s %s" % (skey, value)
newstr = objstr.replace(nullval, newval, 1)
# make PDF object from resulting string
new_obj = JM_pdf_obj_from_str(pdf, newstr)
return new_obj
def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked):
if basestate:
mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate)
if on is not None:
mupdf.pdf_dict_del( conf, PDF_NAME('ON'))
if on:
arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1)
JM_set_ocg_arrays_imp( arr, on)
if off is not None:
mupdf.pdf_dict_del( conf, PDF_NAME('OFF'))
if off:
arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1)
JM_set_ocg_arrays_imp( arr, off)
if locked is not None:
mupdf.pdf_dict_del( conf, PDF_NAME('Locked'))
if locked:
arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1)
JM_set_ocg_arrays_imp( arr, locked)
if rbgroups is not None:
mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups'))
if rbgroups:
arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1)
n =len(rbgroups)
for i in range(n):
item0 = rbgroups[i]
obj = mupdf.pdf_array_push_array( arr, 1)
JM_set_ocg_arrays_imp( obj, item0)
def JM_set_ocg_arrays_imp(arr, list_):
'''
Set OCG arrays from dict of Python lists
Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
'''
pdf = mupdf.pdf_get_bound_document(arr)
for xref in list_:
obj = mupdf.pdf_new_indirect(pdf, xref, 0)
mupdf.pdf_array_push(arr, obj)
def JM_set_resource_property(ref, name, xref):
'''
Insert an item into Resources/Properties (used for Marked Content)
Arguments:
(1) e.g. page object, Form XObject
(2) marked content name
(3) xref of the referenced object (insert as indirect reference)
'''
pdf = mupdf.pdf_get_bound_document(ref)
ind = mupdf.pdf_new_indirect(pdf, xref, 0)
if not ind.m_internal:
RAISEPY(MSG_BAD_XREF, PyExc_ValueError)
resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources'))
if not resources.m_internal:
resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1)
properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties'))
if not properties.m_internal:
properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1)
mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind)
def JM_set_widget_properties(annot, Widget):
'''
Update the PDF form field with the properties from a Python Widget object.
Called by "Page.add_widget" and "Annot.update_widget".
'''
if isinstance( annot, Annot):
annot = annot.this
assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
page = mupdf.pdf_annot_page(annot)
annot_obj = mupdf.pdf_annot_obj(annot)
pdf = page.doc()
def GETATTR(name):
return getattr(Widget, name, None)
value = GETATTR("field_type")
field_type = value
# rectangle --------------------------------------------------------------
value = GETATTR("rect")
rect = JM_rect_from_py(value)
rot_mat = JM_rotate_page_matrix(page)
rect = mupdf.fz_transform_rect(rect, rot_mat)
mupdf.pdf_set_annot_rect(annot, rect)
# fill color -------------------------------------------------------------
value = GETATTR("fill_color")
if value and PySequence_Check(value):
n = len(value)
fill_col = mupdf.pdf_new_array(pdf, n)
col = 0
for i in range(n):
col = value[i]
mupdf.pdf_array_push_real(fill_col, col)
mupdf.pdf_field_set_fill_color(annot_obj, fill_col)
# dashes -----------------------------------------------------------------
value = GETATTR("border_dashes")
if value and PySequence_Check(value):
n = len(value)
dashes = mupdf.pdf_new_array(pdf, n)
for i in range(n):
mupdf.pdf_array_push_int(dashes, value[i])
mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D'))
# border color -----------------------------------------------------------
value = GETATTR("border_color")
if value and PySequence_Check(value):
n = len(value)
border_col = mupdf.pdf_new_array(pdf, n)
col = 0
for i in range(n):
col = value[i]
mupdf.pdf_array_push_real(border_col, col)
mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC'))
# entry ignored - may be used later
#
#int text_format = (int) PyInt_AsLong(GETATTR("text_format"));
#
# field label -----------------------------------------------------------
value = GETATTR("field_label")
if value is not None:
label = JM_StrAsChar(value)
mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label)
# field name -------------------------------------------------------------
value = GETATTR("field_name")
if value is not None:
name = JM_StrAsChar(value)
old_name = mupdf.pdf_load_field_name(annot_obj)
if name != old_name:
mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name)
# max text len -----------------------------------------------------------
if field_type == mupdf.PDF_WIDGET_TYPE_TEXT:
value = GETATTR("text_maxlen")
text_maxlen = value
if text_maxlen:
mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen)
value = GETATTR("field_display")
d = value
mupdf.pdf_field_set_display(annot_obj, d)
# choice values ----------------------------------------------------------
if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX):
value = GETATTR("choice_values")
JM_set_choice_options(annot, value)
# border style -----------------------------------------------------------
value = GETATTR("border_style")
val = JM_get_border_style(value)
mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S'))
# border width -----------------------------------------------------------
value = GETATTR("border_width")
border_width = value
mupdf.pdf_dict_putl(
annot_obj,
mupdf.pdf_new_real(border_width),
PDF_NAME('BS'),
PDF_NAME('W'),
)
# /DA string -------------------------------------------------------------
value = GETATTR("_text_da")
da = JM_StrAsChar(value)
mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da)
mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS')) # not supported by MuPDF
mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC')) # not supported by MuPDF
# field flags ------------------------------------------------------------
field_flags = GETATTR("field_flags")
if field_flags is not None:
if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO
elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO
elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON:
field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags)
# button caption ---------------------------------------------------------
value = GETATTR("button_caption")
ca = JM_StrAsChar(value)
if ca:
mupdf.pdf_field_set_button_caption(annot_obj, ca)
# script (/A) -------------------------------------------------------
value = GETATTR("script")
JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value)
# script (/AA/K) -------------------------------------------------------
value = GETATTR("script_stroke")
JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value)
# script (/AA/F) -------------------------------------------------------
value = GETATTR("script_format")
JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value)
# script (/AA/V) -------------------------------------------------------
value = GETATTR("script_change")
JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value)
# script (/AA/C) -------------------------------------------------------
value = GETATTR("script_calc")
JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value)
# script (/AA/Bl) -------------------------------------------------------
value = GETATTR("script_blur")
JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value)
# script (/AA/Fo) -------------------------------------------------------
value = GETATTR("script_focus")
JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value)
# field value ------------------------------------------------------------
value = GETATTR("field_value") # field value
text = JM_StrAsChar(value) # convert to text (may fail!)
if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
if not value:
mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1)
mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off")
else:
# TODO check if another button in the group is ON and if so set it Off
onstate = mupdf.pdf_button_field_on_state(annot_obj)
if onstate.m_internal:
on = mupdf.pdf_to_name(onstate)
mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
elif text:
mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text)
elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX: # will always be "Yes" or "Off"
if value is True or text == 'Yes':
onstate = mupdf.pdf_button_field_on_state(annot_obj)
on = mupdf.pdf_to_name(onstate)
mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), 'Yes')
mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), 'Yes')
else:
mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off')
mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off')
else:
if text:
mupdf.pdf_set_field_value(pdf, annot_obj, text, 1)
if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX):
mupdf.pdf_dict_del(annot_obj, PDF_NAME('I'))
mupdf.pdf_dirty_annot(annot)
mupdf.pdf_set_annot_hot(annot, 1)
mupdf.pdf_set_annot_active(annot, 1)
mupdf.pdf_update_annot(annot)
def JM_show_string_cs(
text,
user_font,
trm,
s,
wmode,
bidi_level,
markup_dir,
language,
):
i = 0
while i < len(s):
l, ucs = mupdf.fz_chartorune(s[i:])
i += l
gid = mupdf.fz_encode_character_sc(user_font, ucs)
if gid == 0:
gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language)
else:
font = user_font
mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language)
adv = mupdf.fz_advance_glyph(font, gid, wmode)
if wmode == 0:
trm = mupdf.fz_pre_translate(trm, adv, 0)
else:
trm = mupdf.fz_pre_translate(trm, 0, -adv)
return trm
def JM_UnicodeFromBuffer(buff):
buff_bytes = mupdf.fz_buffer_extract_copy(buff)
val = buff_bytes.decode(errors='replace')
z = val.find(chr(0))
if z >= 0:
val = val[:z]
return val
def message_warning(text):
'''
Generate a warning.
'''
message(f'warning: {text}')
def JM_update_stream(doc, obj, buffer_, compress):
'''
update a stream object
compress stream when beneficial
'''
len_, _ = mupdf.fz_buffer_storage(buffer_)
nlen = len_
if len_ > 30: # ignore small stuff
nres = JM_compress_buffer(buffer_)
assert isinstance(nres, mupdf.FzBuffer)
nlen, _ = mupdf.fz_buffer_storage(nres)
if nlen < len_ and nres and compress==1: # was it worth the effort?
mupdf.pdf_dict_put(
obj,
mupdf.PDF_ENUM_NAME_Filter,
mupdf.PDF_ENUM_NAME_FlateDecode,
)
mupdf.pdf_update_stream(doc, obj, nres, 1)
else:
mupdf.pdf_update_stream(doc, obj, buffer_, 0)
def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap):
'''
Make an XObject from a PDF page
For a positive xref assume that its object can be used instead
'''
assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}'
if xref > 0:
xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0)
else:
srcpage = mupdf.pdf_page_from_fz_page(fsrcpage.this)
spageref = srcpage.obj()
mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox')))
# Deep-copy resources object of source page
o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources'))
if gmap.m_internal:
# use graftmap when possible
resources = mupdf.pdf_graft_mapped_object(gmap, o)
else:
resources = mupdf.pdf_graft_object(pdfout, o)
# get spgage contents source
res = JM_read_contents(spageref)
#-------------------------------------------------------------
# create XObject representing the source page
#-------------------------------------------------------------
xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res)
# store spage contents
JM_update_stream(pdfout, xobj1, res, 1)
# store spage resources
mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources)
return xobj1
def PySequence_Check(s):
return isinstance(s, (tuple, list))
def PySequence_Size(s):
return len(s)
# constants: error messages. These are also in extra.i.
#
MSG_BAD_ANNOT_TYPE = "bad annot type"
MSG_BAD_APN = "bad or missing annot AP/N"
MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs"
MSG_BAD_ARG_POINTS = "bad seq of points"
MSG_BAD_BUFFER = "bad type: 'buffer'"
MSG_BAD_COLOR_SEQ = "bad color sequence"
MSG_BAD_DOCUMENT = "cannot open broken document"
MSG_BAD_FILETYPE = "bad filetype"
MSG_BAD_LOCATION = "bad location"
MSG_BAD_OC_CONFIG = "bad config number"
MSG_BAD_OC_LAYER = "bad layer number"
MSG_BAD_OC_REF = "bad 'oc' reference"
MSG_BAD_PAGEID = "bad page id"
MSG_BAD_PAGENO = "bad page number(s)"
MSG_BAD_PDFROOT = "PDF has no root"
MSG_BAD_RECT = "rect is infinite or empty"
MSG_BAD_TEXT = "bad type: 'text'"
MSG_BAD_XREF = "bad xref"
MSG_COLOR_COUNT_FAILED = "color count failed"
MSG_FILE_OR_BUFFER = "need font file or buffer"
MSG_FONT_FAILED = "cannot create font"
MSG_IS_NO_ANNOT = "is no annotation"
MSG_IS_NO_IMAGE = "is no image"
MSG_IS_NO_PDF = "is no PDF"
MSG_IS_NO_DICT = "object is no PDF dict"
MSG_PIX_NOALPHA = "source pixmap has no alpha"
MSG_PIXEL_OUTSIDE = "pixel(s) outside image"
JM_Exc_FileDataError = 'FileDataError'
PyExc_ValueError = 'ValueError'
def RAISEPY( msg, exc):
#JM_Exc_CurrentException=exc
#fz_throw(context, FZ_ERROR_GENERIC, msg)
raise Exception( msg)
def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'):
# FIXED: handle raw unicode escape sequences
if not s:
return ""
if isinstance(s, str):
rc = s.encode("utf8", errors=errors)
elif isinstance(s, bytes):
rc = s[:]
ret = rc.decode('raw_unicode_escape', errors=errors)
return ret
def CheckColor(c: OptSeq):
if c:
if (
type(c) not in (list, tuple)
or len(c) not in (1, 3, 4)
or min(c) < 0
or max(c) > 1
):
raise ValueError("need 1, 3 or 4 color components in range 0 to 1")
def CheckFont(page: Page, fontname: str) -> tuple:
"""Return an entry in the page's font list if reference name matches.
"""
for f in page.get_fonts():
if f[4] == fontname:
return f
def CheckFontInfo(doc: Document, xref: int) -> list:
"""Return a font info if present in the document.
"""
for f in doc.FontInfos:
if xref == f[0]:
return f
def CheckMarkerArg(quads: typing.Any) -> tuple:
if CheckRect(quads):
r = Rect(quads)
return (r.quad,)
if CheckQuad(quads):
return (quads,)
for q in quads:
if not (CheckRect(q) or CheckQuad(q)):
raise ValueError("bad quads entry")
return quads
def CheckMorph(o: typing.Any) -> bool:
if not bool(o):
return False
if not (type(o) in (list, tuple) and len(o) == 2):
raise ValueError("morph must be a sequence of length 2")
if not (len(o[0]) == 2 and len(o[1]) == 6):
raise ValueError("invalid morph parm 0")
if not o[1][4] == o[1][5] == 0:
raise ValueError("invalid morph parm 1")
return True
def CheckParent(o: typing.Any):
return
if not hasattr(o, "parent") or o.parent is None:
raise ValueError(f"orphaned object {type(o)=}: parent is None")
def CheckQuad(q: typing.Any) -> bool:
"""Check whether an object is convex, not empty quad-like.
It must be a sequence of 4 number pairs.
"""
try:
q0 = Quad(q)
except Exception:
if g_exceptions_verbose > 1: exception_info()
return False
return q0.is_convex
def CheckRect(r: typing.Any) -> bool:
"""Check whether an object is non-degenerate rect-like.
It must be a sequence of 4 numbers.
"""
try:
r = Rect(r)
except Exception:
if g_exceptions_verbose > 1: exception_info()
return False
return not (r.is_empty or r.is_infinite)
def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str:
if not c:
return ""
if hasattr(c, "__float__"):
c = (c,)
CheckColor(c)
if len(c) == 1:
s = _format_g(c[0]) + " "
return s + "G " if f == "c" else s + "g "
if len(c) == 3:
s = _format_g(tuple(c)) + " "
return s + "RG " if f == "c" else s + "rg "
s = _format_g(tuple(c)) + " "
return s + "K " if f == "c" else s + "k "
def Page__add_text_marker(self, quads, annot_type):
pdfpage = self._pdf_page()
rotation = JM_page_rotation(pdfpage)
def final():
if rotation != 0:
mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation)
try:
if rotation != 0:
mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0)
annot = mupdf.pdf_create_annot(pdfpage, annot_type)
for item in quads:
q = JM_quad_from_py(item)
mupdf.pdf_add_annot_quad_point(annot, q)
mupdf.pdf_update_annot(annot)
JM_add_annot_id(annot, "A")
final()
except Exception:
if g_exceptions_verbose: exception_info()
final()
return
return Annot(annot)
def PDF_NAME(x):
assert isinstance(x, str)
return getattr(mupdf, f'PDF_ENUM_NAME_{x}')
def UpdateFontInfo(doc: Document, info: typing.Sequence):
xref = info[0]
found = False
for i, fi in enumerate(doc.FontInfos):
if fi[0] == xref:
found = True
break
if found:
doc.FontInfos[i] = info
else:
doc.FontInfos.append(info)
def args_match(args, *types):
'''
Returns true if <args> matches <types>.
Each item in <types> is a type or tuple of types. Any of these types will
match an item in <args>. `None` will match anything in <args>. `type(None)`
will match an arg whose value is `None`.
'''
j = 0
for i in range(len(types)):
type_ = types[i]
if j >= len(args):
if isinstance(type_, tuple) and None in type_:
# arg is missing but has default value.
continue
else:
return False
if type_ is not None and not isinstance(args[j], type_):
return False
j += 1
if j != len(args):
return False
return True
def calc_image_matrix(width, height, tr, rotate, keep):
'''
# compute image insertion matrix
'''
trect = JM_rect_from_py(tr)
rot = mupdf.fz_rotate(rotate)
trw = trect.x1 - trect.x0
trh = trect.y1 - trect.y0
w = trw
h = trh
if keep:
large = max(width, height)
fw = width / large
fh = height / large
else:
fw = fh = 1
small = min(fw, fh)
if rotate != 0 and rotate != 180:
f = fw
fw = fh
fh = f
if fw < 1:
if trw / fw > trh / fh:
w = trh * small
h = trh
else:
w = trw
h = trw / small
elif fw != fh:
if trw / fw > trh / fh:
w = trh / small
h = trh
else:
w = trw
h = trw * small
else:
w = trw
h = trh
tmp = mupdf.fz_make_point(
(trect.x0 + trect.x1) / 2,
(trect.y0 + trect.y1) / 2,
)
mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5)
mat = mupdf.fz_concat(mat, rot)
mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h))
mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y))
return mat
def detect_super_script(line, ch):
if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0:
return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1
return 0
def dir_str(x):
ret = f'{x} {type(x)} ({len(dir(x))}):\n'
for i in dir(x):
ret += f' {i}\n'
return ret
def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str:
""" Return a PDF string enclosed in [] brackets, suitable for the PDF TJ
operator.
Notes:
The input string is converted to either 2 or 4 hex digits per character.
Args:
simple: no glyphs: 2-chars, use char codes as the glyph
glyphs: 2-chars, use glyphs instead of char codes (Symbol,
ZapfDingbats)
not simple: ordering < 0: 4-chars, use glyphs not char codes
ordering >=0: a CJK font! 4 chars, use char codes as glyphs
"""
if text.startswith("[<") and text.endswith(">]"): # already done
return text
if not bool(text):
return "[<>]"
if simple: # each char or its glyph is coded as a 2-byte hex
if glyphs is None: # not Symbol, not ZapfDingbats: use char code
otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text])
else: # Symbol or ZapfDingbats: use glyphs
otxt = "".join(
["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text]
)
return "[<" + otxt + ">]"
# non-simple fonts: each char or its glyph is coded as 4-byte hex
if ordering < 0: # not a CJK font: use the glyphs
otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text])
else: # CJK: use the char codes
otxt = "".join(["%04x" % ord(c) for c in text])
return "[<" + otxt + ">]"
def get_pdf_str(s: str) -> str:
""" Return a PDF string depending on its coding.
Notes:
Returns a string bracketed with either "()" or "<>" for hex values.
If only ascii then "(original)" is returned, else if only 8 bit chars
then "(original)" with interspersed octal strings \nnn is returned,
else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the
UTF-16BE encoding of the original.
"""
if not bool(s):
return "()"
def make_utf16be(s):
r = bytearray([254, 255]) + bytearray(s, "UTF-16BE")
return "<" + r.hex() + ">" # brackets indicate hex
# The following either returns the original string with mixed-in
# octal numbers \nnn for chars outside the ASCII range, or returns
# the UTF-16BE BOM version of the string.
r = ""
for c in s:
oc = ord(c)
if oc > 255: # shortcut if beyond 8-bit code range
return make_utf16be(s)
if oc > 31 and oc < 127: # in ASCII range
if c in ("(", ")", "\\"): # these need to be escaped
r += "\\"
r += c
continue
if oc > 127: # beyond ASCII
r += "\\%03o" % oc
continue
# now the white spaces
if oc == 8: # backspace
r += "\\b"
elif oc == 9: # tab
r += "\\t"
elif oc == 10: # line feed
r += "\\n"
elif oc == 12: # form feed
r += "\\f"
elif oc == 13: # carriage return
r += "\\r"
else:
r += "\\267" # unsupported: replace by 0xB7
return "(" + r + ")"
def get_tessdata() -> str:
"""Detect Tesseract-OCR and return its language support folder.
This function can be used to enable OCR via Tesseract even if the
environment variable TESSDATA_PREFIX has not been set.
If the value of TESSDATA_PREFIX is None, the function tries to locate
Tesseract-OCR and fills the required variable.
Returns:
Folder name of tessdata if Tesseract-OCR is available, otherwise False.
"""
TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX")
if TESSDATA_PREFIX is not None:
return TESSDATA_PREFIX
if sys.platform == "win32":
tessdata = "C:\\Program Files\\Tesseract-OCR\\tessdata"
else:
tessdata = "/usr/share/tesseract-ocr/4.00/tessdata"
if os.path.exists(tessdata):
return tessdata
"""
Try to locate the tesseract-ocr installation.
"""
import subprocess
# Windows systems:
if sys.platform == "win32":
cp = subprocess.run('where tesseract', shell=1, capture_output=1, check=0)
response = cp.stdout.strip()
if cp.returncode or not response:
message("Tesseract-OCR is not installed")
return False
dirname = os.path.dirname(response) # path of tesseract.exe
tessdata = os.path.join(dirname, "tessdata") # language support
if os.path.exists(tessdata): # all ok?
return tessdata
else: # should not happen!
message("unexpected: Tesseract-OCR has no 'tessdata' folder")
return False
# Unix-like systems:
cp = subprocess.run('whereis tesseract-ocr', shell=1, capture_output=1, check=0)
response = cp.stdout.strip().split()
if cp.returncode or len(response) != 2: # if not 2 tokens: no tesseract-ocr
message("Tesseract-OCR is not installed")
return False
# determine tessdata via iteration over subfolders
tessdata = None
for sub_response in response.iterdir():
for sub_sub in sub_response.iterdir():
if str(sub_sub).endswith("tessdata"):
tessdata = sub_sub
break
if tessdata is not None:
return tessdata
else:
message("unexpected: tesseract-ocr has no 'tessdata' folder")
return False
return False
def css_for_pymupdf_font(
fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None
) -> str:
"""Create @font-face items for the given fontcode of pymupdf-fonts.
Adds @font-face support for fonts contained in package pymupdf-fonts.
Creates a CSS font-family for all fonts starting with string 'fontcode'.
Note:
The font naming convention in package pymupdf-fonts is "fontcode<sf>",
where the suffix "sf" is either empty or one of "it", "bo" or "bi".
These suffixes thus represent the regular, italic, bold or bold-italic
variants of a font. For example, font code "notos" refers to fonts
"notos" - "Noto Sans Regular"
"notosit" - "Noto Sans Italic"
"notosbo" - "Noto Sans Bold"
"notosbi" - "Noto Sans Bold Italic"
This function creates four CSS @font-face definitions and collectively
assigns the font-family name "notos" to them (or the "name" value).
All fitting font buffers of the pymupdf-fonts package are placed / added
to the archive provided as parameter.
To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct
font weight (bold) or style (italic) will automatically be selected.
Expects and returns the CSS source, with the new CSS definitions appended.
Args:
fontcode: (str) font code for naming the font variants to include.
E.g. "fig" adds notos, notosi, notosb, notosbi fonts.
A maximum of 4 font variants is accepted.
CSS: (str) CSS string to add @font-face definitions to.
archive: (Archive, mandatory) where to place the font buffers.
name: (str) use this as family-name instead of 'fontcode'.
Returns:
Modified CSS, with appended @font-face statements for each font variant
of fontcode.
Fontbuffers associated with "fontcode" will be added to 'archive'.
"""
# @font-face template string
CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n"
if not type(archive) is Archive:
raise ValueError("'archive' must be an Archive")
if CSS is None:
CSS = ""
# select font codes starting with the pass-in string
font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)]
if font_keys == []:
raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.")
if len(font_keys) > 4:
raise ValueError("fontcode too short")
if name is None: # use this name for font-family
name = fontcode
for fkey in font_keys:
font = fitz_fontdescriptors[fkey]
bold = font["bold"] # determine font property
italic = font["italic"] # determine font property
fbuff = font["loader"]() # load the fontbuffer
archive.add(fbuff, fkey) # update the archive
bold_text = "font-weight: bold;" if bold else ""
italic_text = "font-style: italic;" if italic else ""
CSS += CSSFONT % (name, fkey, bold_text, italic_text)
return CSS
def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float:
"""Calculate length of a string for a built-in font.
Args:
fontname: name of the font.
fontsize: font size points.
encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic.
Returns:
(float) length of text.
"""
fontname = fontname.lower()
basename = Base14_fontdict.get(fontname, None)
glyphs = None
if basename == "Symbol":
glyphs = symbol_glyphs
if basename == "ZapfDingbats":
glyphs = zapf_glyphs
if glyphs is not None:
w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text])
return w * fontsize
if fontname in Base14_fontdict.keys():
return util_measure_string(
text, Base14_fontdict[fontname], fontsize, encoding
)
if fontname in (
"china-t",
"china-s",
"china-ts",
"china-ss",
"japan",
"japan-s",
"korea",
"korea-s",
):
return len(text) * fontsize
raise ValueError("Font '%s' is unsupported" % fontname)
def image_profile(img: typing.ByteString) -> dict:
""" Return basic properties of an image.
Args:
img: bytes, bytearray, io.BytesIO object or an opened image file.
Returns:
A dictionary with keys width, height, colorspace.n, bpc, type, ext and size,
where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable
file extension.
"""
if type(img) is io.BytesIO:
stream = img.getvalue()
elif hasattr(img, "read"):
stream = img.read()
elif type(img) in (bytes, bytearray):
stream = img
else:
raise ValueError("bad argument 'img'")
return TOOLS.image_profile(stream)
def jm_append_merge(dev):
'''
Append current path to list or merge into last path of the list.
(1) Append if first path, different item lists or not a 'stroke' version
of previous path
(2) If new path has the same items, merge its content into previous path
and change path["type"] to "fs".
(3) If "out" is callable, skip the previous and pass dictionary to it.
'''
#log(f'{getattr(dev, "pathdict", None)=}')
assert isinstance(dev.out, list)
#log( f'{dev.out=}')
if callable(dev.method) or dev.method: # function or method
# callback.
if dev.method is None:
# fixme, this surely cannot happen?
assert 0
#resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL)
else:
#log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}')
resp = getattr(dev.out, dev.method)(dev.pathdict)
if not resp:
message("calling cdrawings callback function/method failed!")
dev.pathdict = None
return
def append():
#log(f'jm_append_merge(): clearing dev.pathdict')
dev.out.append(dev.pathdict.copy())
dev.pathdict.clear()
assert isinstance(dev.out, list)
len_ = len(dev.out) # len of output list so far
#log('{len_=}')
if len_ == 0: # always append first path
return append()
#log(f'{getattr(dev, "pathdict", None)=}')
thistype = dev.pathdict[ dictkey_type]
#log(f'{thistype=}')
if thistype != 's': # if not stroke, then append
return append()
prev = dev.out[ len_-1] # get prev path
#log( f'{prev=}')
prevtype = prev[ dictkey_type]
#log( f'{prevtype=}')
if prevtype != 'f': # if previous not fill, append
return append()
# last check: there must be the same list of items for "f" and "s".
previtems = prev[ dictkey_items]
thisitems = dev.pathdict[ dictkey_items]
if previtems != thisitems:
return append()
#rc = PyDict_Merge(prev, dev.pathdict, 0); // merge with no override
try:
for k, v in dev.pathdict.items():
if k not in prev:
prev[k] = v
rc = 0
except Exception:
if g_exceptions_verbose: exception_info()
#raise
rc = -1
if rc == 0:
prev[ dictkey_type] = 'fs'
dev.pathdict.clear()
else:
message("could not merge stroke and fill path")
append()
def jm_bbox_add_rect( dev, ctx, rect, code):
if not dev.layers:
dev.result.append( (code, JM_py_from_rect(rect)))
else:
dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name))
def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params):
r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
r = mupdf.ll_fz_transform_rect( r.internal(), ctm)
jm_bbox_add_rect( dev, ctx, r, "fill-image")
def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params):
try:
jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask")
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
even_odd = True if even_odd else False
try:
jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path")
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params):
try:
jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade")
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args):
try:
jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text")
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_bbox_fill_text( dev, ctx, text, ctm, *args):
try:
jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text")
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_bbox_ignore_text( dev, ctx, text, ctm):
jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text")
def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
try:
jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path")
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_checkquad(dev):
'''
Check whether the last 4 lines represent a quad.
Because of how we count, the lines are a polyline already, i.e. last point
of a line equals 1st point of next line.
So we check for a polygon (last line's end point equals start point).
If not true we return 0.
'''
#log(f'{getattr(dev, "pathdict", None)=}')
items = dev.pathdict[ dictkey_items]
len_ = len(items)
f = [0] * 8 # coordinates of the 4 corners
# fill the 8 floats in f, start from items[-4:]
for i in range( 4): # store line start points
line = items[ len_ - 4 + i]
temp = JM_point_from_py( line[1])
f[i * 2] = temp.x
f[i * 2 + 1] = temp.y
lp = JM_point_from_py( line[ 2])
if lp.x != f[0] or lp.y != f[1]:
# not a polygon!
#dev.linecount -= 1
return 0
# we have detected a quad
dev.linecount = 0 # reset this
# a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
# are pairs of floats representing a quad corner each.
# relationship of float array to quad points:
# (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5])
rect = ('qu', JM_py_from_quad(q))
items[ len_ - 4] = rect # replace item -4 by rect
del items[ len_ - 3 : len_] # delete remaining 3 items
return 1
def jm_checkrect(dev):
'''
Check whether the last 3 path items represent a rectangle.
Returns 1 if we have modified the path, otherwise 0.
'''
#log(f'{getattr(dev, "pathdict", None)=}')
dev.linecount = 0 # reset line count
orientation = 0 # area orientation of rectangle
items = dev.pathdict[ dictkey_items]
len_ = len(items)
line0 = items[ len_ - 3]
ll = JM_point_from_py( line0[ 1])
lr = JM_point_from_py( line0[ 2])
# no need to extract "line1"!
line2 = items[ len_ - 1]
ur = JM_point_from_py( line2[ 1])
ul = JM_point_from_py( line2[ 2])
# Assumption:
# When decomposing rects, MuPDF always starts with a horizontal line,
# followed by a vertical line, followed by a horizontal line.
# First line: (ll, lr), third line: (ul, ur).
# If 1st line is below 3rd line, we record anti-clockwise (+1), else
# clockwise (-1) orientation.
if (0
or ll.y != lr.y
or ll.x != ul.x
or ur.y != ul.y
or ur.x != lr.x
):
return 0 # not a rectangle
# we have a rect, replace last 3 "l" items by one "re" item.
if ul.y < lr.y:
r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y)
orientation = 1
else:
r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y)
orientation = -1
rect = ( 're', JM_py_from_rect(r), orientation)
items[ len_ - 3] = rect # replace item -3 by rect
del items[ len_ - 2 : len_] # delete remaining 2 items
return 1
def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno):
span = text.head
while 1:
if not span:
break
jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno)
span = span.next
def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno):
'''
jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno)
'''
out_font = None
assert isinstance( span, mupdf.fz_text_span)
span = mupdf.FzTextSpan( span)
assert isinstance( ctm, mupdf.fz_matrix)
ctm = mupdf.FzMatrix( ctm)
fontname = JM_font_name( span.font())
#float rgb[3];
#PyObject *chars = PyTuple_New(span->len);
mat = mupdf.fz_concat(span.trm(), ctm) # text transformation matrix
dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction
fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y) # font size
dir = mupdf.fz_normalize_vector(dir)
space_adv = 0
asc = JM_font_ascender( span.font())
dsc = JM_font_descender( span.font())
if asc < 1e-3: # probably Tesseract font
dsc = -0.1
asc = 0.9
# compute effective ascender / descender
ascsize = asc * fsize / (asc - dsc)
dscsize = dsc * fsize / (asc - dsc)
fflags = 0 # font flags
mono = mupdf.fz_font_is_monospaced( span.font())
fflags += mono * TEXT_FONT_MONOSPACED
fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC
fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED
fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD
last_adv = 0
# walk through characters of span
span_bbox = mupdf.FzRect()
rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0)
if dir.x == -1: # left-right flip
rot.d = 1
chars = []
for i in range( span.m_internal.len):
adv = 0
if span.items(i).gid >= 0:
adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode)
adv *= fsize
last_adv = adv
if span.items(i).ucs == 32:
space_adv = adv
char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y)
char_orig = mupdf.fz_transform_point(char_orig, ctm)
m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y)
m1 = mupdf.fz_concat(m1, rot)
m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y))
x0 = char_orig.x
x1 = x0 + adv
if (
(mat.d > 0 and (dir.x == 1 or dir.x == -1))
or
(mat.b != 0 and mat.b == -mat.c)
): # up-down flip
y0 = char_orig.y + dscsize
y1 = char_orig.y + ascsize
else:
y0 = char_orig.y - ascsize
y1 = char_orig.y - dscsize
char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1)
char_bbox = mupdf.fz_transform_rect(char_bbox, m1)
chars.append(
(
span.items(i).ucs,
span.items(i).gid,
(
char_orig.x,
char_orig.y,
),
(
char_bbox.x0,
char_bbox.y0,
char_bbox.x1,
char_bbox.y1,
),
)
)
if i > 0:
span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox)
else:
span_bbox = char_bbox
chars = tuple(chars)
if not space_adv:
if not mono:
c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0)
space_adv = mupdf.fz_advance_glyph(
span.font(),
c,
span.m_internal.wmode,
)
space_adv *= fsize
if not space_adv:
space_adv = last_adv
else:
space_adv = last_adv # for mono, any char width suffices
# make the span dictionary
span_dict = dict()
span_dict[ 'dir'] = JM_py_from_point(dir)
span_dict[ 'font'] = JM_EscapeStrFromStr(fontname)
span_dict[ 'wmode'] = span.m_internal.wmode
span_dict[ 'flags'] =fflags
span_dict[ "bidi_lvl"] =span.m_internal.bidi_level
span_dict[ "bidi_dir"] = span.m_internal.markup_dir
span_dict[ 'ascender'] = asc
span_dict[ 'descender'] = dsc
span_dict[ 'colorspace'] = 3
if colorspace:
rgb = mupdf.fz_convert_color(
mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)),
color,
mupdf.fz_device_rgb(),
mupdf.FzColorspace(),
mupdf.FzColorParams(),
)
rgb = rgb[:3] # mupdf.fz_convert_color() always returns 4 items.
else:
rgb = (0, 0, 0)
if dev.linewidth > 0: # width of character border
linewidth = dev.linewidth
else:
linewidth = fsize * 0.05 # default: 5% of font size
#log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}')
span_dict[ 'color'] = rgb
span_dict[ 'size'] = fsize
span_dict[ "opacity"] = alpha
span_dict[ "linewidth"] = linewidth
span_dict[ "spacewidth"] = space_adv
span_dict[ 'type'] = type_
span_dict[ 'bbox'] = JM_py_from_rect(span_bbox)
span_dict[ 'layer'] = dev.layer_name
span_dict[ "seqno"] = seqno
span_dict[ 'chars'] = chars
#log(f'{span_dict=}')
dev.out.append( span_dict)
def jm_lineart_color(colorspace, color):
#log(f' ')
if colorspace:
try:
# Need to be careful to use a named Python object to ensure
# that the `params` we pass to mupdf.ll_fz_convert_color() is
# valid. E.g. doing:
#
# rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal())
#
# - seems to end up with a corrupted `params`.
#
cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB)
cp = mupdf.FzColorParams()
rgb = mupdf.ll_fz_convert_color(
colorspace,
color,
cs.m_internal,
None,
cp.internal(),
)
except Exception:
if g_exceptions_verbose: exception_info()
raise
return rgb[:3]
return ()
def jm_lineart_drop_device(dev, ctx):
if isinstance(dev.out, list):
dev.out = []
dev.scissors = []
def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
#log(f'{getattr(dev, "pathdict", None)=}')
#log(f'jm_lineart_fill_path(): {dev.seqno=}')
even_odd = True if even_odd else False
try:
assert isinstance( ctm, mupdf.fz_matrix)
dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
dev.path_type = trace_device_FILL_PATH
jm_lineart_path( dev, ctx, path)
if dev.pathdict is None:
return
#item_count = len(dev.pathdict[ dictkey_items])
#if item_count == 0:
# return
dev.pathdict[ dictkey_type] ="f"
dev.pathdict[ "even_odd"] = even_odd
dev.pathdict[ "fill_opacity"] = alpha
#log(f'setting dev.pathdict[ "closePath"] to false')
#dev.pathdict[ "closePath"] = False
dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color)
dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
dev.pathdict[ "seqno"] = dev.seqno
#jm_append_merge(dev)
dev.pathdict[ 'layer'] = dev.layer_name
if dev.clips:
dev.pathdict[ 'level'] = dev.depth
jm_append_merge(dev)
dev.seqno += 1
#log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}')
except Exception:
if g_exceptions_verbose: exception_info()
raise
# There are 3 text trace types:
# 0 - fill text (PDF Tr 0)
# 1 - stroke text (PDF Tr 1)
# 3 - ignore text (PDF Tr 3)
def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params):
if 0:
log(f'{type(ctx)=} {ctx=}')
log(f'{type(dev)=} {dev=}')
log(f'{type(text)=} {text=}')
log(f'{type(ctm)=} {ctm=}')
log(f'{type(colorspace)=} {colorspace=}')
log(f'{type(color)=} {color=}')
log(f'{type(alpha)=} {alpha=}')
log(f'{type(color_params)=} {color_params=}')
jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno)
dev.seqno += 1
def jm_lineart_ignore_text(dev, text, ctm):
#log(f'{getattr(dev, "pathdict", None)=}')
jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno)
dev.seqno += 1
class Walker(mupdf.FzPathWalker2):
def __init__(self, dev):
super().__init__()
self.use_virtual_moveto()
self.use_virtual_lineto()
self.use_virtual_curveto()
self.use_virtual_closepath()
self.dev = dev
def closepath(self, ctx): # trace_close().
#log(f'Walker(): {self.dev.pathdict=}')
try:
if self.dev.linecount == 3:
if jm_checkrect(self.dev):
#log(f'end1: {self.dev.pathdict=}')
return
self.dev.linecount = 0 # reset # of consec. lines
if self.dev.havemove:
if self.dev.lastpoint != self.dev.firstpoint:
item = ("l", JM_py_from_point(self.dev.lastpoint),
JM_py_from_point(self.dev.firstpoint))
self.dev.pathdict[dictkey_items].append(item)
self.dev.lastpoint = self.dev.firstpoint
self.dev.pathdict["closePath"] = False
else:
#log('setting self.dev.pathdict[ "closePath"] to true')
self.dev.pathdict[ "closePath"] = True
#log(f'end2: {self.dev.pathdict=}')
self.dev.havemove = 0
except Exception:
if g_exceptions_verbose: exception_info()
raise
def curveto(self, ctx, x1, y1, x2, y2, x3, y3): # trace_curveto().
#log(f'Walker(): {self.dev.pathdict=}')
try:
self.dev.linecount = 0 # reset # of consec. lines
p1 = mupdf.fz_make_point(x1, y1)
p2 = mupdf.fz_make_point(x2, y2)
p3 = mupdf.fz_make_point(x3, y3)
p1 = mupdf.fz_transform_point(p1, self.dev.ctm)
p2 = mupdf.fz_transform_point(p2, self.dev.ctm)
p3 = mupdf.fz_transform_point(p3, self.dev.ctm)
self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1)
self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2)
self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3)
list_ = (
"c",
JM_py_from_point(self.dev.lastpoint),
JM_py_from_point(p1),
JM_py_from_point(p2),
JM_py_from_point(p3),
)
self.dev.lastpoint = p3
self.dev.pathdict[ dictkey_items].append( list_)
except Exception:
if g_exceptions_verbose: exception_info()
raise
def lineto(self, ctx, x, y): # trace_lineto().
#log(f'Walker(): {self.dev.pathdict=}')
try:
p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm)
self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1)
list_ = (
'l',
JM_py_from_point( self.dev.lastpoint),
JM_py_from_point(p1),
)
self.dev.lastpoint = p1
items = self.dev.pathdict[ dictkey_items]
items.append( list_)
self.dev.linecount += 1 # counts consecutive lines
if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH:
# shrink to "re" or "qu" item
jm_checkquad(self.dev)
except Exception:
if g_exceptions_verbose: exception_info()
raise
def moveto(self, ctx, x, y): # trace_moveto().
if 0 and isinstance(self.dev.pathdict, dict):
log(f'self.dev.pathdict:')
for n, v in self.dev.pathdict.items():
log( ' {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}')
#log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}')
try:
#log( '{=dev.ctm type(dev.ctm)}')
self.dev.lastpoint = mupdf.fz_transform_point(
mupdf.fz_make_point(x, y),
self.dev.ctm,
)
if mupdf.fz_is_infinite_rect( self.dev.pathrect):
self.dev.pathrect = mupdf.fz_make_rect(
self.dev.lastpoint.x,
self.dev.lastpoint.y,
self.dev.lastpoint.x,
self.dev.lastpoint.y,
)
self.dev.firstpoint = self.dev.lastpoint
self.dev.havemove = 1
self.dev.linecount = 0 # reset # of consec. lines
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_lineart_path(dev, ctx, path):
'''
Create the "items" list of the path dictionary
* either create or empty the path dictionary
* reset the end point of the path
* reset count of consecutive lines
* invoke fz_walk_path(), which create the single items
* if no items detected, empty path dict again
'''
#log(f'{getattr(dev, "pathdict", None)=}')
try:
dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
dev.linecount = 0
dev.lastpoint = mupdf.FzPoint( 0, 0)
dev.pathdict = dict()
dev.pathdict[ dictkey_items] = []
# First time we create a Walker instance is slow, e.g. 0.3s, then later
# times run in around 0.01ms. If Walker is defined locally instead of
# globally, each time takes 0.3s.
#
walker = Walker(dev)
mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal)
# Check if any items were added ...
if not dev.pathdict[ dictkey_items]:
dev.pathdict = None
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
#log(f'{dev.pathdict=} {dev.clips=}')
try:
assert isinstance( ctm, mupdf.fz_matrix)
dev.pathfactor = 1
if abs(ctm.a) == abs(ctm.d):
dev.pathfactor = abs(ctm.a)
dev.ctm = mupdf.FzMatrix( ctm) # fz_concat(ctm, dev_ptm);
dev.path_type = trace_device_STROKE_PATH
jm_lineart_path( dev, ctx, path)
if dev.pathdict is None:
return
dev.pathdict[ dictkey_type] = 's'
dev.pathdict[ 'stroke_opacity'] = alpha
dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color)
dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth
dev.pathdict[ 'lineCap'] = (
stroke.start_cap,
stroke.dash_cap,
stroke.end_cap,
)
dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin
if 'closePath' not in dev.pathdict:
#log('setting dev.pathdict["closePath"] to false')
dev.pathdict['closePath'] = False
# output the "dashes" string
if stroke.dash_len:
buff = mupdf.fz_new_buffer( 256)
mupdf.fz_append_string( buff, "[ ") # left bracket
for i in range( stroke.dash_len):
# We use mupdf python's SWIG-generated floats_getitem() fn to
# access float *stroke.dash_list[].
value = mupdf.floats_getitem( stroke.dash_list, i) # stroke.dash_list[i].
mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ')
mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}')
dev.pathdict[ 'dashes'] = buff
else:
dev.pathdict[ 'dashes'] = '[] 0'
dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
dev.pathdict['layer'] = dev.layer_name
dev.pathdict[ 'seqno'] = dev.seqno
if dev.clips:
dev.pathdict[ 'level'] = dev.depth
jm_append_merge(dev)
dev.seqno += 1
except Exception:
if g_exceptions_verbose: exception_info()
raise
def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor):
if not dev.clips:
return
dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
dev.path_type = trace_device_CLIP_PATH
jm_lineart_path(dev, ctx, path)
if dev.pathdict is None:
return
dev.pathdict[ dictkey_type] = 'clip'
dev.pathdict[ 'even_odd'] = bool(even_odd)
if 'closePath' not in dev.pathdict:
#log(f'setting dev.pathdict["closePath"] to False')
dev.pathdict['closePath'] = False
dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
dev.pathdict['level'] = dev.depth
dev.pathdict['layer'] = dev.layer_name
jm_append_merge(dev)
dev.depth += 1
def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor):
if not dev.clips:
return
dev.ctm = mupdf.FzMatrix(ctm) # fz_concat(ctm, trace_device_ptm);
dev.path_type = trace_device_CLIP_STROKE_PATH
jm_lineart_path(dev, ctx, path)
if dev.pathdict is None:
return
dev.pathdict['dictkey_type'] = 'clip'
dev.pathdict['even_odd'] = None
if 'closePath' not in dev.pathdict:
#log(f'setting dev.pathdict["closePath"] to False')
dev.pathdict['closePath'] = False
dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
dev.pathdict['level'] = dev.depth
dev.pathdict['layer'] = dev.layer_name
jm_append_merge(dev)
dev.depth += 1
def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor):
if not dev.clips:
return
compute_scissor(dev)
dev.depth += 1
def jm_lineart_clip_text(dev, ctx, text, ctm, scissor):
if not dev.clips:
return
compute_scissor(dev)
dev.depth += 1
def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor):
if not dev.clips:
return
compute_scissor(dev)
dev.depth += 1
def jm_lineart_pop_clip(dev, ctx):
if not dev.clips or not dev.scissors:
return
len_ = len(dev.scissors)
if len_ < 1:
return
del dev.scissors[-1]
dev.depth -= 1
def jm_lineart_begin_layer(dev, ctx, name):
if name:
dev.layer_name = name
else:
dev.layer_name = ""
def jm_lineart_end_layer(dev, ctx):
dev.layer_name = ""
def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha):
#log(f'{dev.pathdict=} {dev.clips=}')
if not dev.clips:
return
dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
"type": "group",
"rect": JM_py_from_rect(bbox),
"isolated": bool(isolated),
"knockout": bool(knockout),
"blendmode": mupdf.fz_blendmode_name(blendmode),
"opacity": alpha,
"level": dev.depth,
"layer": dev.layer_name
}
jm_append_merge(dev)
dev.depth += 1
def jm_lineart_end_group(dev, ctx):
#log(f'{dev.pathdict=} {dev.clips=}')
if not dev.clips:
return
dev.depth -= 1
def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params):
jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno)
dev.seqno += 1
def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params):
dev.linewidth = stroke.linewidth
jm_increase_seqno( dev, ctx)
def jm_increase_seqno( dev, ctx, *vargs):
try:
dev.seqno += 1
except Exception:
if g_exceptions_verbose: exception_info()
raise
def planish_line(p1: point_like, p2: point_like) -> Matrix:
"""Compute matrix which maps line from p1 to p2 to the x-axis, such that it
maintains its length and p1 * matrix = Point(0, 0).
Args:
p1, p2: point_like
Returns:
Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at
the same distance to Point(0,0). Will always combine a rotation and a
transformation.
"""
p1 = Point(p1)
p2 = Point(p2)
return Matrix(util_hor_matrix(p1, p2))
class JM_image_reporter_Filter(mupdf.PdfFilterOptions2):
def __init__(self):
super().__init__()
self.use_virtual_image_filter()
def image_filter( self, ctx, ctm, name, image):
assert isinstance(ctm, mupdf.fz_matrix)
JM_image_filter(self, mupdf.FzMatrix(ctm), name, image)
if mupdf_cppyy:
# cppyy doesn't appear to treat returned None as nullptr,
# resulting in obscure 'python exception' exception.
return 0
class JM_new_bbox_device_Device(mupdf.FzDevice2):
def __init__(self, result, layers):
super().__init__()
self.result = result
self.layers = layers
self.use_virtual_fill_path()
self.use_virtual_stroke_path()
self.use_virtual_fill_text()
self.use_virtual_stroke_text()
self.use_virtual_ignore_text()
self.use_virtual_fill_shade()
self.use_virtual_fill_image()
self.use_virtual_fill_image_mask()
self.use_virtual_begin_layer()
self.use_virtual_end_layer()
begin_layer = jm_lineart_begin_layer
end_layer = jm_lineart_end_layer
fill_path = jm_bbox_fill_path
stroke_path = jm_bbox_stroke_path
fill_text = jm_bbox_fill_text
stroke_text = jm_bbox_stroke_text
ignore_text = jm_bbox_ignore_text
fill_shade = jm_bbox_fill_shade
fill_image = jm_bbox_fill_image
fill_image_mask = jm_bbox_fill_image_mask
class JM_new_output_fileptr_Output(mupdf.FzOutput2):
def __init__(self, bio):
super().__init__()
self.bio = bio
self.use_virtual_write()
self.use_virtual_seek()
self.use_virtual_tell()
self.use_virtual_truncate()
def seek( self, ctx, offset, whence):
return self.bio.seek( offset, whence)
def tell( self, ctx):
ret = self.bio.tell()
return ret
def truncate( self, ctx):
return self.bio.truncate()
def write(self, ctx, data_raw, data_length):
data = mupdf.raw_to_python_bytes(data_raw, data_length)
return self.bio.write(data)
def compute_scissor(dev):
'''
Every scissor of a clip is a sub rectangle of the preceeding clip scissor
if the clip level is larger.
'''
if dev.scissors is None:
dev.scissors = list()
num_scissors = len(dev.scissors)
if num_scissors > 0:
last_scissor = dev.scissors[num_scissors-1]
scissor = JM_rect_from_py(last_scissor)
scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect)
else:
scissor = dev.pathrect
dev.scissors.append(JM_py_from_rect(scissor))
return scissor
class JM_new_lineart_device_Device(mupdf.FzDevice2):
'''
LINEART device for Python method Page.get_cdrawings()
'''
#log(f'JM_new_lineart_device_Device()')
def __init__(self, out, clips, method):
#log(f'JM_new_lineart_device_Device.__init__()')
super().__init__()
# fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().".
#self.use_virtual_drop_device()
self.use_virtual_fill_path()
self.use_virtual_stroke_path()
self.use_virtual_clip_path()
self.use_virtual_clip_image_mask()
self.use_virtual_clip_stroke_path()
self.use_virtual_clip_stroke_text()
self.use_virtual_clip_text()
self.use_virtual_fill_text
self.use_virtual_stroke_text
self.use_virtual_ignore_text
self.use_virtual_fill_shade()
self.use_virtual_fill_image()
self.use_virtual_fill_image_mask()
self.use_virtual_pop_clip()
self.use_virtual_begin_group()
self.use_virtual_end_group()
self.use_virtual_begin_layer()
self.use_virtual_end_layer()
self.out = out
self.seqno = 0
self.depth = 0
self.clips = clips
self.method = method
self.scissors = None
self.layer_name = "" # optional content name
self.pathrect = None
self.linewidth = 0
self.ptm = mupdf.FzMatrix()
self.ctm = mupdf.FzMatrix()
self.rot = mupdf.FzMatrix()
self.lastpoint = mupdf.FzPoint()
self.firstpoint = mupdf.FzPoint()
self.havemove = 0
self.pathrect = mupdf.FzRect()
self.pathfactor = 0
self.linecount = 0
self.path_type = 0
#drop_device = jm_lineart_drop_device
fill_path = jm_lineart_fill_path
stroke_path = jm_lineart_stroke_path
clip_image_mask = jm_lineart_clip_image_mask
clip_path = jm_lineart_clip_path
clip_stroke_path = jm_lineart_clip_stroke_path
clip_text = jm_lineart_clip_text
clip_stroke_text = jm_lineart_clip_stroke_text
fill_text = jm_increase_seqno
stroke_text = jm_increase_seqno
ignore_text = jm_increase_seqno
fill_shade = jm_increase_seqno
fill_image = jm_increase_seqno
fill_image_mask = jm_increase_seqno
pop_clip = jm_lineart_pop_clip
begin_group = jm_lineart_begin_group
end_group = jm_lineart_end_group
begin_layer = jm_lineart_begin_layer
end_layer = jm_lineart_end_layer
class JM_new_texttrace_device(mupdf.FzDevice2):
'''
Trace TEXT device for Python method Page.get_texttrace()
'''
def __init__(self, out):
super().__init__()
self.use_virtual_fill_path()
self.use_virtual_stroke_path()
self.use_virtual_fill_text()
self.use_virtual_stroke_text()
self.use_virtual_ignore_text()
self.use_virtual_fill_shade()
self.use_virtual_fill_image()
self.use_virtual_fill_image_mask()
self.use_virtual_begin_layer()
self.use_virtual_end_layer()
self.out = out
self.seqno = 0
self.depth = 0
self.clips = 0
self.method = None
self.seqno = 0
self.pathdict = dict()
self.scissors = list()
self.linewidth = 0
self.ptm = mupdf.FzMatrix()
self.ctm = mupdf.FzMatrix()
self.rot = mupdf.FzMatrix()
self.lastpoint = mupdf.FzPoint()
self.pathrect = mupdf.FzRect()
self.pathfactor = 0
self.linecount = 0
self.path_type = 0
self.layer_name = ""
fill_path = jm_increase_seqno
stroke_path = jm_dev_linewidth
fill_text = jm_lineart_fill_text
stroke_text = jm_lineart_stroke_text
ignore_text = jm_lineart_ignore_text
fill_shade = jm_increase_seqno
fill_image = jm_increase_seqno
fill_image_mask = jm_increase_seqno
begin_layer = jm_lineart_begin_layer
end_layer = jm_lineart_end_layer
def _get_glyph_text() -> bytes:
'''
Adobe Glyph List function
'''
import base64
import gzip
return gzip.decompress(base64.b64decode(
b'H4sIABmRaF8C/7W9SZfjRpI1useviPP15utzqroJgBjYWhEkKGWVlKnOoapVO0YQEYSCJE'
b'IcMhT569+9Ppibg8xevHdeSpmEXfPBfDZ3N3f/t7u//r//k/zb3WJ4eTv2T9vzXTaZZH/N'
b'Junsbr4Z7ru7/7s9n1/+6z//8/X19T/WRP7jYdj/57//R/Jv8Pax2/Sn87G/v5z74XC3Pm'
b'zuLqfurj/cnYbL8aEzyH1/WB/f7h6H4/70l7vX/ry9G47wzK/hcr7bD5v+sX9YM4i/3K2P'
b'3d1Ld9z353O3uXs5Dl/7DT7O2/UZ/3Tw9zjsdsNrf3i6exgOm57eTsbbvjv/1w2xTnfDo5'
b'fnYdjA3eV0vjt25zXkRJB36/vhKwN+kEw4DOf+ofsLuP3pboewGISO7bAxPkUU+EaUD7t1'
b'v++O/3FTCESmcsILgQRuLhDs/w857lz6NsPDZd8dzmtfSP85HO8GcI53+/W5O/br3QkeJa'
b'9NERmPKgE2Ue+73vgj97Ded5TH1pPDEFCT4/35RFFtAMORMezXb3dwiioCsYe77rABjjCO'
b'jHs/nLs7mx3wuYFYX+HsEQyTfHg/DY/nVxa0rzmnl+6BVQfeegTyemSlOdjqczqJ0J9/ev'
b'fp7tOH1ed/zj+2d/j+9eOHf7xbtsu75jcw27vFh19/+/jux58+3/304edl+/HT3fz9kq3i'
b'w/vPH981Xz5/APR/5p/g9/+Qhb+/3bX/8+vH9tOnuw8f79798uvP7xAcwv84f//5XfvpL/'
b'D97v3i5y/Ld+9//Msdgrh7/+Hz3c/vfnn3GQ4/f/iLifja492HFbz+0n5c/ARg3rz7+d3n'
b'30ycq3ef3zO+FSKc3/06//j53eLLz/OPd79++fjrh0/tHRIHr8t3nxY/z9/90i7/AxIg1r'
b'v2H+37z3effpr//PPN1CIF47Q2LUSdNz+3NjakdvnuY7v4/BcEGb4WyEPI+DMT++nXdvEO'
b'n8iWFomaf/ztL8wZhPqp/e8vcAbm3XL+y/xHpPH/xlnDejXKHJTQ4svH9hdK/mF19+lL8+'
b'nzu89fPrd3P374sDSZ/qn9+I93i/bTD/D+8wcWxOruy6f2L4jl89xEjkCQaZ9+4Hfz5dM7'
b'k33v3n9uP3788uvndx/e/zu8/vThn8ggSDqH56XJ6Q/vTZKRVx8+/sZgmRemIP5y98+fWu'
b'Ao8vc+z+bMjE/Iu8Vn7RBxIis/q7TevW9//Pndj+37RWuz/AND+ue7T+2/o+zefaKTdzbq'
b'f84R7xeTdJYYJLOf7z4xq11N/osp2bt3q7v58h/vKLxzjtrw6Z2rOSbzFj+5rEd7+P84UL'
b'xH8/6vO/lj2/6Pu7eX7d3P6C3Y2tb3u+7ua3dkA/yvu+w/JqyV6GeUt0/dy7nb36MjySZ/'
b'MUMO3Hz5+LNycsdx54SB5wmN/XJvRh0z/vz1/PaCf4Zhd/rP9dPur/j7eDDtfIV+dX3+r7'
b'vz63B36vb9w7AbDn/ddLseown7kr7bbU4YIhD6/03//e7JiM0O669/vbyg1/hPdKLd8WGN'
b'PmnXoSs52h5200OGk/WW/fvdl0NvhpHTw3q3Pt59Xe8uCOARA8ydCcX433Z/rjfonfbrnf'
b'hP5j9MJtM0mbf4XZT4XT9czt0Pk3S1ALFfPxyHA6g2A3WCz90Pq6qFO+dsskjdtzAB3B+7'
b'rwwDeWi/reu0nbcOeMBostv1Dz9MpsuJwzbD+b5DcuGuKR32dFx/pcfGO9oOw7MZlAj64M'
b'/9bmOAaTJ/WFuJF0t898eHXfdDNmV4JC77x133J8XONCDiTTWq5JkvNMMLNY9C1ZLNa82R'
b'rIki9ULP50AZ/6pczOyn92DSE3IqRSZs7nc2+gmqKMi+O3an/sQkTQOpszcLsBTnsg2gSE'
b'f/KskTQ4YaANrFPFn4b/ELIEo/Iu2jQkbg/QEtEJXe1Y6MtWP3sl3/MMlnqf08D4cBaclr'
b'5KzEzHTuyXhZPyCXVhkcD0/DoXsmEwEfoWVQqsJ+Sg2eW9qniOGQFqHh3n+XCNMWCMLJ3b'
b'c4BPB2vz5CYenXkKjI06Rhu8mSJlSxKmmQX+uHB6g1jC0ztEQ+TRqdISmC6A46TLiH/sfM'
b'wBczE0mo4WrXHzoJpUyaKCvglLnpJC1XiEWSBN55eIHcDChLFpQ4TxZrHWkL2mUXwl6Yto'
b'N6OLefEmyRLHy7mizwDT1yt1szryqhfCOa1AJJBtKVZFRtCd8WU3pATvFrbr5cHlo6Dome'
b'tzoF0xmAbn3/vF2fgKgcbhbkKCCrCKBYETp0uZt+2siJ5pSGc92+kOVgbLVIOREE/rw+jc'
b'JfNGSxGWBysYMmOzxrCU3qelSBOUV1VQCf456kXEGaqB4gykGJUKTJQupBnixZ9NNk+S+2'
b'ihS/0kkCjOoD6ccjhCO3niVLKfYW367Y0xY90TIU6MwSVkRfVdMM6HFYsxzpPGobc0NLrV'
b'4ky6htQIoOA9rLmWTeIupuh6aRZaij5vPp2LH15zO49PmEMH1niBrcCCWd60KgH00/Bmgp'
b'kM8t9NzL/mm930scS/j7XYuHlr2MGiXkiwoDQvnESoFVyfKEarx1uSGFA7ehkULobywiRP'
b'BNiqgAcbOCo9MFRwtGp1GVn6wSDuzTImllwJ65b2mcAPyAjZxvfcTpHN+2xC0bZboApKt6'
b'joBDPZhbIgyyEeD7B7Sx9kZ1qTWqKgeUkvZ66MUI1N4eejGytzeG3kgUP/QumFyVWyD1+E'
b'pSja9NICVYYqbrSkvzJV2Xo0WhQfIedV+EsGU0rd23hAogyuUKtNZ7kBjOxTEPBT9LS/Cv'
b'BlfE32OqDgVzo+JFfWt3uqkhATv4OEhYCFtGXrRhR/jCY7Is4kuCVWavQ0QdiVoDqoiute'
b'kS9K0eFjpDy3E8nc75EdVjKGbtgVmg+1KkWtQAVp/hpaPQM1SNl1O/YwryWeEJUS3gUkeb'
b'wTnzDLP+DdtgG0jtClLrXh86SHu6mQoIb1r5HM1KWjmksEN7xQ9VsjVpEQ1ezvA7gUqMD+'
b'97RcpruAv3Le0G8V2Oww/ZBDpq+40xQxPBh2/G6D1BqRSiKq7YJ5TJKjTdJlnpDjptk1U0'
b'phVwrbvkabJy/S5Ut1UPnyELqgwIovM1Cm6jCoGgMDERdp6sJJ/K5EeKViU/Nqc/Lutj90'
b'OeYwD8UVS6Kb7RNzMrc/sZhqsZmYenfh3EnCc/StfWJj9KniAe0WFSKFE/hpxYWEK0k5TA'
b'wIh806Z72+hRd37UjZ50NJBBxu16o3UD+N1iHrjZ7LpRfab42+5KJ5gZH5eX8+WomxFq+Y'
b'++BBALJnWqVgGIRywArlFjJgefUXkgf/142NpPKQ84le/KfdtYs1kD2gjLDJ0mP7Hg6uSn'
b'tEb8P2TFYmW+p/xGo+B3kfK7SX7CQF4ZPE1++lUKGh3sT+tbAx3G5J/WN5WyDIzj5tQ/ae'
b'cZYrMDKqraT6b8fWshK2gxGcINBb+0hBQ8uuifpPuHY4SlmwhqwU+qg6frKFcRttbIphPQ'
b'R9WCwJesxfcF85bjZb9bX84siFWEiBYBh98kv1AF3jHTZ8k7PUvMVsm7v0F+TCjefdF4m7'
b'wTJWDpvmXIAeBbSrZI3on2gcBCFrWWCAN8BEhYRFXlK5N3elStQapRdRVIP8hQ0huaNirZ'
b'u6sBmN5NW8wn5kvaoqNFjZgn77qrpQeIFrXXInn3eFw/o62hZ8IU7Z2M0Qv3LREDiNQOJK'
b'vXQZEej8mQoT9th+NZO0TxyYCL+ukInW4UZFS14AO1SrX3Jnk36ByH4DIyMjMHO/jMzJfq'
b'MEsDhNLI0VCJyIAEUiopfEt7xzj2zk2XU9T0d9GQxPrzbdufT9GgMPWgrwuaWSZ/Y02eJ3'
b'+L5nZp8rdQ+VaWkPaJucrfok6uTv42mog1yd+ijEP4kpx58ndG2SR/V0NNkfz976E/WiZ/'
b'X99DZ3/uoxF+AtjV1Nx8q8JEqDd7qhkZYwUmB/byYoqG7OuuvwX63cnibJH8XQa0Gt8yoO'
b'UlKJ9v0JT/Ho9fZKuWgX7i7/FYPwUQLU2skr9vdTKh0/19q9UBhOgHI0gSjz0QU8+WUGx/'
b'jwoFJTAgF5SXemIhmYEhH066cZUEfEE2yc8syEXyM3s9aIU//4yuEtXlZ6815DN87+83Jq'
b'fh3OdavsR3yDVyJNdSS8STlByRjPISnlz/szJfgWNp8VoGUoZiqH8/969RViOG35kMcOJs'
b'RBqibJwnP0fZCI9+gol2Y79l3IBnya9F8gvza5n8oip+mfxihVqVUD7tt0yJVwRchW+TX0'
b'ImZckvekjEGPeLSjJ0nV+iejSdJr9EMkMGEQvfVHGMioqq/cuFhbVI3lPWNnlvynaevPdl'
b'Os2T974coS++D+WIye77IGJuibgc0dG8j8uRnqKkTA0tHsrkPSv4rnuk69kyeY+yEBW2Tt'
b'6bQmvwGxUa4tGFBv3ofZQBSNjwqnMI8UiOgOmXJJep+5Y5AQCTQ8vkA3NolXzARD8tMvxK'
b'qc+TD37AX+buWwIAACXpGM1y0I048Nbwi+C8ioAS+eBzH7J9YK7Bw8aPCTPIE8pgaglRG5'
b'YR4KsW6t2HmysAy1oz/LxzmWlUD8Vx8JLgCPXzKWgAH3T/jXRhfPKVrJgYUlSXBcigutDv'
b'rXxSsEROTCkjCMiMz1JUDQCnajBhkaqxAhD1zwXoPeodVNIPkQ7Skj6yUDBImU/J3LmllR'
b'BtZiHJ0IWlo6x0IfrsahmsVlVtHvWMEcFdKTzwLroNeugP8WICa2u8mMDA9t3T2iWOn7rb'
b'd1w/LmCKbejjcDnoalzNLX7uzzutF1ULh3v1BrV031vx8pkQwqZz3VrhQjV6CCNKFtuGJc'
b'J+CXy7FQn0rh9c3zxhZTbfMqVtHSDFTRe+D0CUduDXzrX6WJH2vUThvn0GM8sNoOYxU+9B'
b'4iuSX+EZWf+rFMw0+TU0X/B111iUya+R0rwCHaldcwA3p7hzeLXr2/ywCsMccRkI8fevR1'
b'3P8+RXnf9Qtn49Gac1P3QmkOOSg+//ZnLS5L9DEsrkv6OQwBT3afKR7rPkY6R7LkD7bmCa'
b'fPS9XVHjW8Ya5MXHEEsFIhpVyFb9RzoBqXOyNrRvkMU8kKIiFJAj1s4QiJqjgL0dmCdIRt'
b'jbKlcLknFrTJFEPRoVbfIxyhXwJVf8tw8E/ut0hJ0uLx2tXMBryuQTczFPPq24YzeZYHqP'
b'/hJU5qh0Sir31ITU1FM1qcJRufFXOiozVOV5JpTa+zO8mXdJnoncxM4YUpElI+VdlimozL'
b'ssycu8SxQaKC81OltQXuqS6cu81IUJxUtdVKS81MWSlJe6oJyZl7poQOXisiUlLlekxOWc'
b'lJe6YPqmIvWMlJe6pNRTL3XJtE+91IWhvNQlZZl6qUtKPfWylCyHqZelNPF5WUrmxFRkYe'
b'yFl6Wgv0JykPlZSA4yzwrJQaa9EFmQPmll/ls3EYqw3r/0vsvHAPTJN8XSf0ceSgdKS0BB'
b'qAaLzH7YvvITvb/51OsBtYVubaNDutDSa0vIXJTlGzX9jDU6kmtiaN/2WOU8GTmDt7gzhf'
b'jR+jzSF2+AVgT05AxBbB9iCIUVzdcQ+zZy0SB5236vlk6Rov7JrLTOUYD9nyIAqkHUa4A7'
b'PJ7Ha3DwLn0JXJwZlszn5slndhbT5POaSiyGgM92wQ6p+yzFCzQUHDLsc8j/mSVirR49/+'
b'e4/6WnKHfnhpZCWCSfow1iOL+5+Tunw1AEiL07n6KNW8i6dbv3NT7d0LbgJ/WxCRQp8ymD'
b'Lmlkh4SJqNWgXJIfzwyh4n/WvTemB5+jcoAIesERk97PUEgee6OwNwtDnXrW1npqiPPrQC'
b'Gr5POxg47h1WhiCDtKH5Sxz6d4Z7EB4gsY4b12O7XkD+brIFSafGFxF8kXmY7M3bfkBwA/'
b'uUCxfJHJRY5vKfa5JcJEotGA1INSoxID3aoUIWCl6aPufNEj9RSk0vQXgfQ+llXAJOYsYJ'
b'KCmcKU2cAkwC7WlMm5NtUpAihpoTxKk4e0MnuYuW9xC0Cr9JiefPGThJX99Gofpn9fRpME'
b'iqknCVB0v4wnCegqvkSThBZ0PElg9mpIZwTy7EpTgYxab6wgmGQIGvGX6zXS1oNK1a3oUj'
b'cRZKWo7Cwr2SacF55I2T8Jy+QM03p6298PO+nAcnEgi6lN6jG9ntqMwRuBTb2bwIuEkPkI'
b'0mhNnVI0/i/jheQJMd8ikR7MG9bcJdb9WBvga+MTlJGfv2MY+hLNJCoPSFWfJv9goy6Tf4'
b'T22ST/UHUHU5N/RBOFDHS02gEHrsdpwIuKCuFG2yd18g9JHHi+rmFK90+KUSX/9KLWWfLP'
b'INLCEjJSQ+5/qipSk1QjBKZq/1RJqOvkn77q15Pkn5GIiFNEqpL/oRh18j8h6mXyPzqmBU'
b'gd0zz5n2ikz+Ges5tZm/xPFA8ClXjq5DfGM0t+k6506b6lwRPQpY6x5bcgVWuJkCFl8luo'
b'sSljuOpuVsC06K2hpY+YJr9hHqA714bI5Va3h+B9hqLl/+aLP7efvktZQSi9wzEtQOu6Xo'
b'GOhkfonL9FuYYsklzDt68wFOByuu+fdAbNHXbLYGJB3q4/n3e6LkNREfiWrzr5F8tpnvwr'
b'Mq8qQfsRZ5aIGVa1dN8y/K8ASJE5whVZ2s4myb/sonPVmC9ReBztS2aWJf+KWmAF+ub2RE'
b'3GDa23BW7VGoi+7XRa5gTGO2qLlKiO0vi7Gafl3Ih0kfxLazqzafKvqGgRsxQtv/2uVFMk'
b'tEmEvrFe33cYbXZoTzM06bVvLC1Zm+4rnM0mxJ8uv6+P6zPczWtLH/eXZ65RzA1/v0Z3qc'
b'C8BXi8yML5JAf9dYD2QwU4RNq0Gncx5hGooqbre2Zlb87D7NfHZ121VxFXBYhhVScUyb8f'
b'Xob98Dj8kNN+ay2G2Ln7FkvnlQN0vqcO03ZLlcPEENs7igySfPBipgJRZAsZiZO6vJxYQl'
b'Q4TEXWNwyxC41qq+SlZoghdqXRyBB5pjlict0kvkZAczefJoKH/T2qelpZyFKT1FFDRLoS'
b'KJx3LtkMXCRBYzUABm0XwJQ+Qi7nyAG9pgzuZrN+VnWsIuTqKPJB6aFQ9G7OTfMAB70Rgu'
b'iMSw0ZlidBmxaBWh4WF5G73fNw7FDvcq7srrvgAZE89v2EO/g/QOzCkvVsmtL4aGrIdII+'
b'yFqqe7K2xs6enFlFwJHZxFrJeDK11p+ezOyevCdzu7ftyantXjxZ2A7Ok6XdhPdkZbfaPV'
b'nbzVpPzqwpnCPzibVj82RqzdY8mdmNAk/mdg3Uk1NrU+bJwhqLebK000xPVnYm4snaWgZ6'
b'cma3Wh05ndiJmCdTa9LsycxO/T2Z22m/J6fWLsaThR2kPVnaGbsnK2vw5snaGo94cmZtTB'
b'xZTKwxkidTayDrycxaH3kyt1aWnpxao1VPFtZaxJOlHeg9Wdk9fk/WdlPUkzO73ebIcmKn'
b'qJ5M7Ua0JzOrLnsyp8WNSFVOSYpUZeEarSMpVS4FWlKqXNJbUqpc0ltSqlxCrihVLiFXlK'
b'qQoCpKlUvyK+ZVLsmvmFe5JL8yUknyKyOVJL8yUknyKyOVJL8yUkn51kYqyY2aUuVSvjWl'
b'mkrya0o1FZlrSjWV5NeUairJrynVVJJfU6qpJL+mVFNJb02pppLeGaWaSnpnlGoq6Z0ZqS'
b'S9MyOVpHdmpJL0zoxUkt6ZkUrSOzNSSXpnlGomCZxRqsInEADJXEhTglMhKVVRCEmpilJI'
b'SlVUQlKqohaSUhUzISlVMReSUhWNkEYqn8A0NVL5FKWmdU9WQpZ2DuDJyppoerK2xjmORM'
b'ai8ovMJmMLCcpkbCnJNxlbBZIRVT75NbpNBFUJaUL26a2NVEub3gy5nE1cg8y5MDxx4mO4'
b'JWHLrqhyVs6ynAsJ4UvXrkGyVpTlRMicZCrklGQmZEEyF7IkORWyIlkIyYjKUsgZycqRU9'
b'aKsqyFNELOhKQYbnAhyZDdeEGSQWVeyCmLsswyIRlUlgvJBGZTIRlyVgjJBGalkExgJkKm'
b'TGAmQnKYLjMRksN0mc2FNFKJzJmRaiGkkWoppJGqFdJIJQnkMF3mEyEpVS7p5TBd5pJeDt'
b'NlLunlMF3mkl4O02Uu6eUwXeaSXg7TZS7p5TBd5pJeDtNlLunNjVSSXo6t5VSE5NhaTkVI'
b'jq3lVITk2FpORUiOreVUhGTrK6ciJOt5ORUh2dzKqUjFwbScilSFEUOkKowYUgqFEUNKoT'
b'BiSCkURgwphcKIIaXAwbQsJIEcTMtCEsjBtCwkgZURw+dkwZ6qnE+FZFBVKySDqkshGdSs'
b'FpIJnHsxClOfq5mQTFEtjk19nqVCMkXNXEgGtfRCFqYElz6fUQ+ohXrHJUuhaLyQJRNYLH'
b'yRoZ2DXE6EpONlKmRJMhOyIhn8MqjlVMgZSRGDWVcsSyFTkpWQGclayJzkTEgjlSShMlI1'
b'QhqpFkIaqZZCGqkkvZWRymd7ySG+aCW97EWLVtLLIb5oJb0c4otW0sshvmglvRzii1bSyy'
b'G+aCW9HOKLVtLL/rloJb0c4otW0jszUkl60T+vmiyQBUmf/Ap97KqZBpJc6UUrdm7FaiIk'
b'xVilQlKMlU9ghQ5q1Ug3UnGYKJqpkExvE7imIpVCMqJGxOAwUTS1kIyoqYRkehsvVc1hom'
b'gyIVkKTSokS6HJhaRUi+CYUi2CYyPGTEgjhq8bdW7i9XWjnpqIVkIyooWXasZONXN+yzRD'
b'B5WlTicHiSLLUjdBK9McXVCWujlXmRY04p9kCyGnJJdCFiRbR7LRYSh3jvO0NCOsczydcS'
b'qUUWa/kcHqqldniiRanAG57Y/rp/Vh/UPOk7jraNoPifuwMsL5Sa+XRiBU76bYnKrGR5UR'
b'dK9iNp5V1MbDeF2IXTpvUlnfMwwz0PSHRyA7h61ogQ4M/517jTZE990mAhcER7ZUTNKNlS'
b'aqVP14pWkagSoxdP28PuOvybd5Fsjtevf42m/O2x9WKy5ByDoAR5Fd9+i6THxJMqldgN6s'
b'n7rT1iwGvrJpWVdx6uvWgNv1/tvalFIIJB9xRh6ngW0WM4LHYsQZeawt24olwu/WyGyR1a'
b'VtzzWYkVjZiDMK3bOfT5fjWnxxLA9w7GU10bxxRVjlmjuqECubCS8oqpDPmc3SP7hIeQqo'
b'SdHLFg2Vfdxu1/1xWe9+yDJqDu64PXsdfdx+DlY4bg+mXm6lHrR/6Y6n9WHzAxdWAqmdTR'
b'TuV2eN22BPjyw7qFbIHD48aWBK4Hm7PjxvL+ftGhWWRlHAuHaYcVWFn/fH9cNzdza2uJgt'
b'1FeoN5lHxnEiq7jmCiN6ml3DytfUxWSiyPLMuba+QRuZuOxsrDDRgg/DGY575m2NNnG4bN'
b'bns1/Eo2J1uJy+sjTDYm0A/VpfQHS/BzRcdoACfVmj2ML684TIsTv8kPFAwPploFgv0Uo9'
b's1Bwu0rJ/v7lBbm6qlcrfh6H9cO2OyGXqSSS/lPqTa2B4Yi+74nFwWQZnJ1ht3sT9xDyuO'
b'7UQiLbPpEAoJ8/PiAnuRJocpWdj9nbTNvZnJi50YF6RnSjQ2NpOXmNqnk8Dq/3w5n1fTa1'
b'5GZ92m6GV9oeUI/xkC1NXmQhkCtRXm8i2OWFgAt5c79zgS+ngriwl7kgLujlRBAf8jITyA'
b'S89AHbMGZ5IF0gs1mAfChUqD32uu2RGRDRuUNZb4i79ecioAzQoVlATZgOzgN8eXGYS+cW'
b'Jf2t+xM1hPocES/fJJBIlUq2Q9x+TMYrWARHB3r0qeH6gsclNQ6TFGeKjgJdKQYE//r2Q1'
b'bNWgUyKierT4zBJSqXmWfeCmSrxFQQqREuH02hzVJPbEyhFYG8PzHIeS0ISuJ+PQJ9zpUa'
b'GB5dHVhIcJL4yiMis0OMTmAKBWGdHvrebm5wr7HVQLRf5jjeTLjStHZogzj2LzRg4+zQEv'
b'5Yhmnx9gio0rxSh2mtYoxp1YLLJife8HZ65mgyF2q9456JjKRUDT3nBoY+B60yS0No0WAU'
b'gnVjUcuFIAuh0zYKo5ivrkq2pdPb/uU8mCFAdWZoIWcesEAV9/nHPuUcGYaTKfGgjwo5Bs'
b'5F6aFTkmrAI9vroeRptdPSQe0kvUNQ5y33B0OgnF5ervRRdPCXW9pihHttMQK1tgjGV2rk'
b'Wz9Icdk4ugqH2frWH9wM8o0KD4sxqCMTg4oWBlf33KPFjxoNoYDcYyT2RvKFIqOaTNxJkv'
b'FbyTq3tOSA4auKWk1In51aAb3gXivCS3KPbBz0doxaBRBVZhiD78N2ZprcRxeb5IaW8Qlu'
b'O+pyp/7PcwcnWyoKGGXLEoF2D+sLO4ospzO9RYhQaRriNdGaZKxLohMGNtYhZ8ajSvOM9E'
b'iXRM9qwG4/8r6YrYRzGnYY1DfCmhgZDsMQT2oWaJH3nc5HxqjtMljQ3dmur9xbU4LGQOuR'
b'FRQTdLYzCc4h0kCGiYUBg0JvSGjZobahJt9vdb1akvY1xhC6yjgg1BkC9nh7gZLsdVaS1g'
b'klvUMurHcPKDVzIh551B82eq4Ine6+V+YCTMEONdtXIJ6SNwBKCHVuQ6R0CAaHl6E/nKHv'
b'QEF1SjBn+YbNEcSzzW93pOfpNVd5xqzfscF5uKAYY106/d/4WqtuvuPO69dp+r850CH55P'
b'CWO8aipEU/G3jGo2ZmlnnsHs4em7vAjNvrzGnmN9g6a13Om57cFZm5u8Ch/Q7uH9kpZKXP'
b'geDMZd3pjG4kK9nySZrb98bpmireVbqCRyehEUeLOR270EyTLYdn9E0Zs09fU1SBHlBTsw'
b'JT4/toigdfwz1XNXrXP6ZI9aCrP7J20NUftMw70Gr+CLM8RIuy7oyWgnmrIey5yUnVBPL+'
b'TH4egH2/IZIpRPfCyqsfajV2fqHnNAC6klUWtrUTYiwVbeVoFeIE0Y4iSTRDRFko0MqiES'
b'1MnehGh8Gu0YAVZ6Ihq++tNBQNipF/E3fbJlGDRCTLCLGxNBFmC2weYVE8cRA2keju3frU'
b'sk7CVRvW8iVrLeQMaUpLycKWcriKWc4OJ43RzXCBwm55JXn95imKbu6wGzHk5GECcbCj/B'
b'yyiNlYjdzWuiCchiu5UEEvuh3A40W3A9KY/p251Jm5bxM/R3au9VtoQPCYtx+pss4Mdure'
b'TJfcJg/Uh/LkQVsKloDVOIY58YPc01fh2yuNxLXSaOmgNJLehWPeNcjDhoP3YaP00jrVuM'
b'v9icb8GkXkUC9TkPFysv0Lj0M+IMbh0a4lO0uwbFHZT11mCwu5KmIo9GZP3bGjEg3/Dfzr'
b'pVskQe6kW+JbriLEFOlhfBXhDJDoapklwr2D5F6OO472iMRdQdiYr3AFIenQucGdRNjUnn'
b'BpgQDGE5dV+dU/cXGHeZBb+vDoK9lyZRDdvtqJgYbd5nR+49JM5YLRdRNuotM/0PAetMIz'
b'a0j72mEIXT0cEOoHAZ27U9C3b1NckvPwzLkHJtxpbsjAn1YE/vfLFVeRE82xnm+YCxdkaC'
b'vpykR8+3LFBVnfv1yRWUUDa1bDbd9deEbKVA6/LpVVgWMGN2Gkwhj5KGeeEZbL5x6Kw2B1'
b'2w4ImlM4M8hO5h7xQG2BPjhxnobOA0yku/EQrhnPVSpKh4/S4OBxClwoQX4HjKR36GUUKM'
b'QRXbZx3/vL7ty/7N7Q2c0qh6FxgZo56mV34VrjrPD0AL1pZ+pWjs7dobxTnWMalw+MysMe'
b'daKYsnQo3DTRTTxblMnofJBrqkuFu74HjW3XUXkzDZk6/Xr3tcM8iOPAIrPQhnfW7whMLM'
b'Bp0tEiqUXkMBUx1Nbd5Z4TPvt1uvRnJ6yG3DIPbUoe9g/omUOXM0eTjHQ1+HJr6soRpNHH'
b'JdgdD+ZoywQjn/nc88TX+vjGbfJUIAk2dc64AqCciH5TWNqqmlTome12xXCZjnkOp1Dmsj'
b'buEdqTedxIceNLriBTkA4vEn2Ib1UuvEM/H574wNQS99JCqodtUwtFy0LOp78NT4szjVlu'
b'ndyFK9ngkqS75MxCds1HhxgxXHgNsRd0XZxDUJrD0/HCdJp1c75NMFyOnLA8Hc36E1Qo82'
b'DBAILG5o6YL3h5ETQqRzct78ChZuBoHsZmk7XkYs5rVNJA88Q7R09LLhcp2WmgM9JZoHPS'
b'eaCnpKdCm9irldA/89JRKhCWbnnhDNQeT77nAf1JIfQHngadSHDtJ15VzKHJ0Z952XJaBZ'
b'pnbUJmrHidoSlaSzLtqZA/GlLS+pOJS2T52fide/L9nPmaimgfjWcpg0+8b20i6fzEq1cm'
b'gWvTIdn2ycop2frpi0mHRPbpN1MqUohfTGQS+j9MaMwF9/QGFYtZIE/rw4m6voZQKR+pXR'
b'BDrRtN700ejeBoaTa75utdsTRmy2ba8gYehZvfcKADNvG+DEd7vsF3aqZCBdWL5Q9Pz08B'
b'QtbJJBTFcLx863p7FyZChALQnalWcGkGnqHpvXELM6ONvqGMOk4F/HJEIA9vzGDUwrejuV'
b'Ob+ZiSWrEvX9H0CMS9ZxmHj45VJNwaLafJJlLiSavFqBLkJtgIGNItTZnveImvaYmNl/ig'
b'RAEd2wtMErdyZsxAomUzjzxxDWSSTdy32bmZZClJtSJWGjosiJFW05+S3tX0x0S8CyuVFG'
b'5nl/ty+xlW9CIgrOk5eItA7f628XxnLGVGnLDyd8U/dU88Nek46Zgz8un5AXVAf+z/EFdT'
b'BY4C8CxoB3sBZwocuXesOH2VAkfuHctu7Qtaa3Tkw/Mu9xflo9HoyIfjxTlXKnDk3rO2ps'
b'o6cKLAkXvHYqfUCVgocOTesOImMJ8D00P/dGUBbQbisfP6MNpCmi4CJ8IOvApuZprn8SnI'
b'Pa8sYPrFCMRM4+XQcZdFjvKYQX5aQ+r7nb8/lfWIy2/XRgrzWwy9KrQcO5DetbnJ0X5b4+'
b'LIecP10or1rvZv0XN5RG1Sc1vb54tJ05NPUymUU5RXBLSOsiCAGLnayKNBlaLd8ovJGLMx'
b'GzATzsux33ujBJNJPmFcf8k4OiqMnpWGNWHC1c4MWtl9GBzQImShAFGpy+vR/MOqQG6J0W'
b'3kRP3l9XAedeOG9h23IXQP6oDQhRog9JGYtW3GFb2pIfpmIxP3Ajm6ifYxskSxM0vpWD0S'
b'oiWid6YaQ8tiMOqbfQrm1L2szdJU2GVtrni06zFjmmOqvSrUpo6bOFwQQZPvtn1oOktDh9'
b'EDFUPfQoJS0XtHC7LROYjZTeNosbspCdg9pKn9lCsDa8Z1GPbIVsiLn8sJXcHhsrfrbiEr'
b'V8j/jvdkZxjr40yuEpXHhtBZ7ICQwwTcZhE+MR6/nblD5E/rFyPMnQacJrLXwxMFjogmgS'
b'i6cOZvXifx1RNoklUS3TzhWvpUUNc8gk9pzAGK5NSFxNh1qZA+nwc3OYfaven5JhtEW1Xu'
b'm3P5zDL4wpLdxs0y6NGb6D7EAmE9n7ZmUayYwUO0P4HqEJYqobFtwj30aEPRHBhJPchmBg'
b'guomzWfokE3cKAmuW3MsjXCURb01sZC9I7M82fMA/Nt55I5g6LZpLeoVquE89iCuBD1tNF'
b'Ojo8UUdF9R7U3iBrd1h4zJazQLryrBLfgl2J5wEYFKISt2IkGGxOvDgtzVNP/c4rUluh7G'
b'KZq80mQ8/OwGJRkOCavCzzoHMyK/Fvw8YqNMYSO8ZEvzOc1wMS8qyP2LaCurUCRCOqPLzo'
b'HEMSzuveLNMii8LSPOTQS/MctvTSPCU3r2kgT75ZzYCNnpQcTS5J2CXgOZ3ffmcjJUdXYz'
b'qNVj+LVcIGARE6OWo+w/eReciTJJ1abIdbveS6SDq5ox7+7fq6X29fekCvtQt4ZchRXHG0'
b'NYfhuhbV4Hv0uAeD1UutTM3D9i2+Z6GuAMrgObVEOM0914C8+LHSqIyxM43q2zErzZAXP1'
b'KNRtde5pojb3tQelVCEFUfuwbX5zGk02eskTPuSY8q6aInPSwtR+Mhf6f3+hFOd2WHAz/6'
b'3Q/0XJ1YuNf4VsUK/1H2w2u0No/y0YZX8B2dwYfckY07gnOrBnltP8MI74BQKdvWIlK0jD'
b'0AbkeLSw52jSGrZql14HKxdAF0mEj7MKpUMN+2MdoIxAa+YXufWUzlhRdH5aSPYIs+4yoh'
b'XFT/th0uyJfMQzS1sdY3HFMbi2KwGpD/L9verRzkWeZSKl1+NqldGNECqcNUh+/z1Seucp'
b'FIyuqVAE59Wjkv/m6sykUu/V02qZwTbwBNcnwWgL5u3DqCzNVmeHUgI+N+1MHn4YBc1JcO'
b'GNCf/AehX4nJkbBdt7frlFArOvNkTKgrc4dIRrQekDLOHCIJp59d/8JGl9Go3FMyscky1o'
b'KgA+SekLdoKo/IWzTIAP0WTY6+db8xygiXK+23njmhgkZ6Bf2/cAA4je/gaMg5v506kwVw'
b'F1myQzY9YmA21x18vLn71vFmxG5dNEfH5g2chh86CkY5ehSH0PhOeRTOwSbHPGHZhRdy0M'
b'qGUMKIyN5OmzFp/HzYDSe7WDa3QHgzBoN+DInboo0ZXiFGBvjKMJ/g21+0hVl+F99qhUmC'
b'NbZEP+U+o2bnMNGpSkerBrMg1H/FvP3AdGclivWo8w5+dC5PIZFOXB1I7Qox671IjuK3n/'
b'xBBnLpLatzfjh9oi5JDEffQUIrtfTVoG0cegF2w/DCq9nmBKkbnpWk7D2vDHArh+mWP8ai'
b'1VgGfTZG+xseX6BcSttCZtoZVsUPNRzVpKXU4Ms8VbRCXsqtL0v3LUM8cuaM2M/rxwH9jE'
b'wMOXYoPFpvCbwb0LVLP/9bIu6LVG/WAHkVqbtlB1sp2BeExrTeBPzPB7PSxwVT+637hoXD'
b'7JpqLiTNuyfcSgu03KnvwWhS4UE5P0MAUzXaDpgeEbMvO3dlf6reeFoZyla8mXGjH3yaEb'
b'AqdNrMk0dqqmXyKKsNLb7VUGBoBHDYdj1XhyYz0OetWoVrLRCtwjksWmtrkke9PlMnj0F1'
b'LJLH6MWpVfKobF7R2B4jbQjN6XFsBLvMiI1XyJc50dEKOTTVR730gNgxdlASHvt+fMRMZc'
b'Lfnh8I4HHHD3gyAITpHyPVBtqIg0SzyQSRQQ8y0xq080MBnex2GMeHP63JoCVpw2jNF036'
b'nteP9iCwp8Ia+hgLy+iBE5ZVAxYWkud2sThmKC8xWxZ753ZFN8JHvhx33+3tyWRPBWcOO1'
b'wO9nSyp4ILh7109giyI4LxuIP4ikxvzyEHOrgiejydzRVMqB7diToTpvmPPeS2Vlck4kfL'
b'GLRRy/PCfAUd09JKV24MEOrCVNE3NOW6NXyvKFvfVkeF7pMWSwNo7bdxSFB+LRLrvoXDgu'
b'prkVs6rhVRq7jWbTTUWkgruBYRta62pKi3C0977da6Fx3PxqqHauvAq7agTDtDu+DBMvMm'
b'Eb4jlQxtKBwhxFThcXgUexl2GsOjX/eBqvAIXXAv7CnZR3alvM474XPYLN+p+Qr5aGlVvn'
b'MDhPLNFX2rfJeG78vX+tbF6ZFQnBaJi3PqsFCcFrlVnFYiXZzWbVScFrq1BFoZji5o61YK'
b'2joIBd142he0dS8FbeXRBW0dxH3mUjDpNNMASa9ZWMzVERfQdtSaIZEomAjkuH7g3jFP9k'
b'xJHR449ucJTxFiKvukTeRI+gOFBb69tRzxcLZ5viIZL9NjaH3iod5owGlmU6LxgNPMGLI2'
b'vasMHSzvSGs1bgFaq3Ck7UuHTW4/dwjJKRCYMDlQ3cHfTgDF7x82iZ5DTJYg/VITkifqA2'
b'RRzyEi5DBMl5YIzyEijNFziHDvnkNMzVfggI72CuBSL2EUGWiV5ob0sOcOV3QIq2A4x45v'
b'ZjDkoAAuHC7IKnfI/vLHRu3CzpbEUVl5kpCXpq5II8A33nkeB9oGVggXRQzt162BY0r3FB'
b'ld1qT1M49VZhBXsQxb1wUHhMpgAH1/wNwCoxsEWote3SGwsvhY50F9+N5bkwVZ10+KMWE3'
b'3ppE/m/D5tTcUFphJGInfiXjVE8UIkC9uQAt8UlvLsxJa12a1brfdzt7A4v5DNpPBATVx8'
b'FBiwAQbzsg0N1wxvRBXq6QK0NbzzqdOfHK2JgDoF6/gDKnGO6s7ERjaqLG/L1mOE/pLZ5u'
b'x5EIXtRsnl7DKso5Uh3e+ITbaBRFC9d7IOhVn/QeSANautOM38G0EI3syOsl7eJPlfjlSx'
b'Y1P/WyfpnojWLnwN+c6UhfjXJLhpszWwtEcjs/6jZNIh2NLjmUt57wXQWUIo0MR25vAF82'
b'Ho+GSPE/HGUJgcms8sBwIVSVQF9VfILKAgUkkEO0mIc+hUdSwdEbFgWScuEEYD/4syDzJk'
b'De5qux2Kk/PLlz5pN8FiC3OUo7zye9/dEw9ON6HzaY2Mu8hf3xWcL5O6b129uPrs7IiA0q'
b'UHV1v9fQyU177jwJJ0bpSN91a+lwoy5pddhxSXJkBpIRG/d689ygYf9nRXrUB86nAPuz2m'
b'WbJ9vIgmmlaL1MUtPhDrqkXs2ncLymRKRNLRBbqWTpnTFLCSw9K7bcheXGE2vLahXr2mNj'
b'udFFKKlgz+vTcRQeqlnEvQ7Spep0eb6MWAVznja9ZqJ65MoKM/Tqyd0pM+v4MgzmEoP79f'
b'HenJtvFh62p448vqBIoSbSs7L+ajJFm5udIiTLr5DHMRJs3zR6cJcd3OJRGLTi20zUie6K'
b'I3NqU9sFSO+voKy+gvLpFRQiiOCx0BHzSuqIG4vtWN7eq0kVbS7MipBsOkbyyRgJYWt0LL'
b'DmXcmrmbG44LhHnKtEb4NN0K7iN53RItSbzuhOgvZaWSK86VwkW/2mM/jRm865oSVkuO7s'
b'bW+8UOXMfaTCfkZ2/AoTGw6I3wXNZSpUUFuIbW90sHoVrCIpeo3xYbtG7W3VzCvNOb8O0v'
b'9h7rkdL5tZ7Dv3LTXzIuaOj4I3cyOG741HgtSaJxE2Bg2H6Iwr11OPApgplvhHNwI5OhRc'
b'6DUqBqpP4tWKjjryJRmXc3Rve14CPIjWyvw7XtQwwVHJ2rGSpSxFQXpPpf3Ur6Ch+Prucn'
b'2uqHH46PCMg8cncpYWDidyWguMTuTQmc5V9EvRCXVNRxnCaK2hK/Q+85lOFZGlmtgoIrRO'
b'B4zbuoOvmrnD4xYOMLrmH/kZ6X4oUH2mpcKgAR32xS0MsNlHJ5RJ6+RrOko+ctPZ7VIX4W'
b'c6U0RWKiLPFBFEd8A4+Q6+Sr7D4+QTPAzP24s3VMoomNvQ9zrzzEAPmnjhQgAUsG+xnWdq'
b'mHL4SLMysoJd/ZS0fop+ZuhvA482ObPLgpA7lclqOpxPL7x5ydxdwYIxN1fw0NRW5g3oPH'
b'VbQHHJPSjsIqNjtKT7Xl1klcN3dLC2UHRUfOgMoseFsuUyQlxmQeivXE9EOG8vW+508mpC'
b'+62tuzw/2ojxDkWpzz2gdspKh/EdrYzHXXrq07OkFxOgJb+VlrRK1KWEdZVoe42MpFucga'
b'C9vB+FcMOAVid9bHDTJvpdlKJMem3lAmH86qExRnIB5Vm9CpzH/tgFRpOoBUea3GJW0PmF'
b'x3yluWQLZx5xkCsqUIwpmsnNY5oSlhFqjorlPC8zRs2sZ7WC6hlxuO1/vuzMoRERo4rdHL'
b'm3EuTINdfkiCypRikzzxmjwp9CypcR/8+Hbse5ogQ9i/iP3GHFbNL7xqxVczHgHh54c4j4'
b'Lm/yJfIR+yhiZVFxbddfg8BZxIH+HbIhysieBxj9syMsgKiwduiOjkHO+oon8cUsFFmILy'
b'oU9kvCiRLGYf+B9uHCnsXsc8gSdJaaNYQqkEU18bDehyyJ0u0WnHOaSWiYx+9CgqNoMPI+'
b'SI2Z5jHrBVolaoRENovZJ24hBFHicJXpFVId5eSpe+A5JhFoFjN3jyJPlIzT8NB35zeJLx'
b'LW9nN8kjNGu6jSRfXgdB4enoWVxqzLJkQUVcjTJbTMOC72o191+1po9itXVKRAY9YwbIQT'
b'Nbpv3XFgolRtM1Um9G0q01ljAkNVGVaYkNuqxiAtAVeJMbKGoJSwFDUwjKzWFIQSKovDVS'
b'C9bVOmMG2KyjJRlpLI7KsnmKCiRvfZshw7jo9jpdTjI6XUwWOltLJwUEodMFJKgYp9I7JC'
b'2zeSpcwlQeqVYeR0ZNSJeq4HS7QJPdCxt5Hs5LeOyNIhJtJXhpkowSuzOmRnP35Wj+345r'
b'27E417E5II1DYkYPxOC2y0Q73+PU1uqujQ5ftgzAI/5ua5bIkc3V3ewgEL0GIgx6Hg+l3E'
b'PDH3dQ7Hm3d1FoY9euIKVS/Sw5EBB/RB3vwPXfbB7IHxfH+KJnXQL7WVkEIdDQrU/cBDBD'
b'zFkQbsHNP2CppCaC7Jw8EkAIo+ome0e35ZRhHPfbgVlUF89Rez8BYWkGLAvqTrr7zPqQu3'
b'OfX6ofgCIonhHJviYE2iZuZLve+4mEeIt45i9wDYbNhR+7X+xHYKAYrSjApw1JWVJX9l4p'
b'U7TNecMRaZeCHBp9N2rfd8IalsJRi+0mTRNXklQEU7U7A+UkDYvRPJjI8svtgjRzccwsFF'
b'q8CoL7eeS1slV20p15heQAb+bdufT5H5RuFBOaymmFXyO1XzefJ7dHdKClrt4i1A+i07fu'
b'sdO0uHDTvQ2tZ6kvzu9fUVv0Vfn1lCFqDQGf+OJno6df5MA3L5d3cMQ8qnWCXxBlYNutuH'
b'tdmFoUdXArYGvLoTcGXg8bo4pFQLTTNGsB2dSWuS36NdziVpn0GG0DnkgJBFBOKrWxAgWk'
b'3Oo/6/Rz0MCkYaBDJIzyKzhNeEolfByLA+bZ/7yPIyJRwkLEC6ATQnS3fjc9A3nyFsDMOm'
b'igE82mcXnpUtABpgZIbVJDcssAw4MlBjpMogyzi5slcz6HjvdkEwvttwCUjneGHokOGkda'
b'/BcMfmwVNguhdpFB0NQCUYLy+m15vbz/i+RlRzoG/dcDnsoQfsZbSqUmG8cNXqJaxj1dPA'
b'Iif4qYVxOq2hU8TcGbjH4dirDp55cdr2mzUm/EMop4mGUcF69kz2CunYzag3XTHvwjVZlF'
b'PvoxST5GrrxBTH9Q76KmGwLAYMtztjjnR8jnKWYX33kiI0o2e92N0mz9EFXjPSzmqD32K1'
b'gYnvc+h2UGSxkQbZSnGEGvIcm1dOCai9SZRiZJqh6Sg5kCK+8BM5cGWQvEJ1Ys057NaHDR'
b'OaQoF7jnqXkrQeKQoCvmEarq78Dgi13wBqH7E19Ggj0Tq62kmsDDzuIimhthmlq2AFMTOU'
b'toIggor7fL38WwtnpGsLY6xtzz0j6NuNh0YaN50Oz1u5uhHTWQMMcqtUYYHL2p8pmeQWeQ'
b'2epkT2Fzl1wtjsNVMzpgv647O+uYoZqcw8UDsiZR61OFJzNR3VHuRpfxzGG9WFQfddd9YH'
b'JFnEgAMNmXt0Gs/j/C5bzxhllcfH7icOl8zm6GGQUQDe4akfTsExcjMertF565VtDPrP6m'
b'QrCn18xxNSFg2IyP3rO55QrpENR05aPa8A4ZBkKdHUkKEF54qOygAVaECXE/IV2TSgw1cp'
b'qhkYk3s685KA48Y9U466vSJnOPhDxxwqZSwv+R0SgIhOehLHruIc5CflF4yhzDzrBeMpmH'
b'p5eK7pKDXI3a8SZgPqNVBtwmMm5SLZaSuGDKSzB4SWsBPDBeJa77R0mCeRfjat4m09eJPT'
b'IuHhgKvnT1YLj3/vnZNVfe1ivPfWrqrI0Y1XT1bzaxfXwcy8o2tW41nfe/kEffmVi+tgbD'
b'7IYDkleb8x+kTjvsUwZmYQljsfuDKfQdeKgKBtOTjoVh7wV7Is7L0rAZQbchzrztyMM+ar'
b'AG+6GvPJGil9LbHrYWaxMEVzpf6tiN7Q3BcLE/jzrZBMhhlptuOsX65YL8f6fjuxYHdDsG'
b'Vde+ZVRAvPuTW1WK7uEPL0zkwnnLtb46tyx5iOT2I7X7RIvd3mnyF3UFuN1RRi1UoQSK/0'
b'5MhcpfSQI0pPY4n4lHG+BBqrQvBk7VWhCu60vaqjxWsVSLGsy1Eo3aO9clpf9jY38PiYO5'
b'JL67EJDwXxS8zGpoEcjt6gLcuWc4NHNmrW59hALXNo8AuV3UDaOs1CsovFWM3xIYyQvDTR'
b'XaCAGKK9QzpAtqH3tS877+Ij4CwermWxfsbjHgC+Xo+RaBe60ZyE7kcJ6NER5aacI7rd1w'
b'FKb/+gTPLTgHo7ewXdWFFo8xts7xU8axbr1jEyzC+jU4dTJDGMrEukZ3jYcqvJ7dSCPTxR'
b'gbcXimWVpw+DMeNbKFpsNDPeqetwc/VYhuox7MJlnxk6zYF7rJMUw6q/QMfsRZmrdVbttE'
b'3ie3UyT/OIEeKAE5Tc8A35YM65oD7JaAwh3QML6RT+/NXlPFm706tBiOMsl3Qgl/1TTBlq'
b'01XJsPLEBTMJyK1yyZLvFgtYf4ZMzxMeuENF3Os7WtrEL3hSB7Df+p7n1GFuF3jqyGBlun'
b'RIdPVuTtAtHDBUfwkMY9N3wFg6XAFDmkq9Ots4nwoW3yNlcLUFTr/cskOn8UrjPNN/MKdX'
b'Nab2Me8oB8LBnGqm1zsaDYZb550Xpq/vnuNYUHQe1eHXjYV9yLUlx2HWc+LQfrh+oPGpwv'
b'1rGyyV/rzuMQnRTmcB9rFVBsJQG4u6CnAka+tw733m6Ctpl4aBrirO6CzAUR6nDvfhzh19'
b'lbMTMt7W+0HyqwSiDRlaRUeGDEyTPYFIKQ6nN22jwXz4Q60dNQzmePKu0fO7WU+oYAwvrB'
b'SgyPUYivDC3VhLlFEYN1ENRtMRVD9tFjdNDe07bKj4e70aCZ13f7UaiXZ+Q6FoW+t3rJ1M'
b'HXqtgSzTwBo/SsKqOZojovfb63WMmt77b7HlGLJSr220qaJ1CbF22NOM9LEPOqkig0ZqwK'
b'AektSjZsU0cikoFFjhkOfuEWNLwMsIj3sRz4tRhOSs0iokRs/MkQQz0qlrgaKdgsLwzajV'
b'oI5wKe9q+SJz+GjxwsHjyfQ0iRcEWXsIvKCK62lzNfF4NMV23uMlQOgrBo0CwPRxHxnAkd'
b'YtT9NRuTLmg7mB2iQCn9pcynF9A6FxhgHcTUWVpdwV1hg8SdLoE17xfezvI0tDdh0AA40u'
b'iqP8rnuS2S6zQi0QIL5xi0QskX6Can61QDBDevUCQZ2RVgsEKAi9IsAmenNFgMPFEORZQp'
b'5hL7oPQ6FGE4SrIkRJjfYp2of5DiwMMiEEqIR7rYEgIcF0DMSFtRM19ZL6D9XRIRWXh23Q'
b'g6HLEXDHNkpk/+UxuEZnd/Fr2I0hAg+ZqtccapSKXnNoNR3lF7LkosqPArob0CcT1peLOs'
b'FK6Q7KQp1FSyBu0ARPToE09sRzDZiLBkqTUGCP6BXttd18IM1A3Pt78RgzUOU180utkKBw'
b'L2qJBFnydd89hfzFFHevnCM1rzEfwSv/y4SqGdrrQWttNUlM2cwBooNfbZlO8e1VLTrRqp'
b'alg6pFWp/2mCeH6ByHpqNhtgBDnr9krDMAodDTRN/kMmlA2lYGBXOSHPzEE2PNIUw8MciH'
b'c63LpSXiiSc0skM88aSnaFgtDC0ekDPRbYkINroeUdNRCiFa9wr1/w+rTtuH0A+q0kOU6A'
b'TsjLRfWjeEXlp3QFhaJ4Aey+toLEK9TZwn5hYae4SJo8VhPJus4ITGIlcLtSuHj8YAB8fv'
b'EuSFR+MwUgvHJtN5adEATC0wHoXK2uORBC7Q2GllwXP/3F3OAWZUutyQ29EFipqOyo0ezX'
b'qJ1p+Z/Q71GiUKntO/Cc998SucGbe0ml2tDBCOXNeKvnWJV2b4fgJmfeuj6x4JR9ctEh9d'
b'nzksHF23yK2j61YifXTduo3WPCykD6hbRA6oLywpZ8YnnvYH1K17OaBuY9UH1K2D+L6yTD'
b'A5oF4GSCKbW8ztlCAgsxoCkeLVEDjTW2B5IKPBA6ULXcDMPqgXcCkMvadeIWGPFY3+4KsR'
b'BfFEnW1O2nerhtD9qgNCx0oguEdU0WWZiCq6LFPTUWWmxwOGr/UzzcRVD8prWP0NDTlJ34'
b'+wlIdB7aiWydUDg21rwaftBUKK02au0NEZ/ZVh3TqGUt2ZsyRkX/MMfGsZdpkF1tUMpDG8'
b'8XSmduiNwIrAugqsNbzrRxahmGDU57MA6/5ApWbCRJzVlWwzRfPVJY/4dUAWw1mpSCtFHw'
b'ZZL8TkIcL90VcTWL8xj/nZAJknZ69itZ7QQZkoeX3wbtcZU7DSAEdeO2kujK2Ni9Pl3t6p'
b'Vk8tidERKiSB1AJs1NYF8+5VT6kQpOiXkFEpOfCrGzvS619vXYF1ofKHTI2uD0WeRteHaj'
b'qq6RUZZ72DtLCIX8J0pF7zFChsHxHa37PHejKHE3JFR4cRNEMeIlkl9mIPax3lFFrMMRVq'
b'3k0UVmFZAxf8kG/mDh5otPiQee1UkcHsxIDhch2QSh1EqEr5Q2t403pGS9rrGYbQeoYDgp'
b'7RJgN1x1Uy+BMU6DSHsOucLZPhfn082jlT4Qlt7jjz4C3j2QbMIByC1iZcZLrjF1NIEF3D'
b'mqYe0PILeGUFOrviaFNQw3WHOzJ8ix7ZWkIOd6ymGvALlMtUo0qBXM40w9+JuMw1qk1s0R'
b'cN1/emYr6iTSFzCMXr4p3KXqSGlAMmKBGfR4hHGTWvykDqMkDo2oAZ/k2w8Kyun5wn3vqS'
b'B/ftt5uc18ng7YtXyDxdHggjMmlB8vQOMgKNDIxXpI8shXlqPyWHG0srQdvcQpKrS0tH+e'
b'lC9DnZMtjoqJLJPl7EjFF4uLI+hne9wz1Pbm/XI1khp5CdegkQgos9MNTGIb4wk7kcX5hJ'
b'efbeomWCb8zsaNY6s58pH+Yt7bfet08tZOxb5SrIqrLocUAfoq0vG4ufoebqmlUtHe7MYq'
b'FaDHtVnkvK09vEcJbpCHG+AKKVIriwSnKaRO+IG1KpyBXpoCFPAnnrbqc52V4/Nl5RKzpo'
b'bOgbzIMqU2L2Ni9e5tWQfOx5YzbvW1+Q1Ap1ZYGgTxsgVqdTC+14UR+GqSFWrQ33lmZtUq'
b'IVa+My0qsNcutGKJMKrW8bl6JuG3a4Dqp2pFe2jWN36pEym1SL7m3kCjadk2ZGwKvPqSX6'
b'Iy+jZA0Vw2v215aQOt0uCakhg+6vTPvpz91tCsFFQ0BRAhWrcGiWNO2iAXmeoVEdN49GXz'
b'OViI6Pm/369HDZWaQhct5SIKPgpKhv+n7PNHP01WgAj/5h81XtvuUCKoYyNveeOUz3BmMs'
b'WsRFgq0xRRRsWFBboQj0mQboQ4PoQ4X79r0E+w0DqIPybFyRWTdKzT3mwXXPVqh4t3KexE'
b'9+TAoBwn7lLGD3u9f11zeCCwE90hjk9DAcO7v3N9w6lNEo2Oe/xvQ43CQvfLZskrys1/uX'
b'oDzWBuFZrmATlcGxnmPNQfpetcC3nz4Rf+rMzZ9ZigGBlLnyAoP7SzQPMy7VNIy0XsxOQf'
b'dva0wH/CZUxuD0+jaduLPAxkh/9DTNlOzhYRvZQS+YuNFCPMNFxOxOWNHLRKvtTN2xO7gL'
b'ajD+Chkf3V/mbWCZ94XRWAWwbxgvAqD7KeUuUnxVXKL3zhSmFHwVhH0BuQmAvnjZpcbfrZ'
b'PNFD1Oz0rx7IPJtULsWZVKITpJrcKjNOkIJVFzDapU6VDse8ulQnS6DM6Z5qZ/NPO/DMCp'
b'Cyf2Tbmfolt1KUpYkCfl7l+p7GeaamKjiGytiLBF6YDxqXgHX52Kd3h8Kp7gN+UKutmLXp'
b'9FQoPCjBLSC6rQhuzNoaj50Qk4uAuXcUynQoVJDrHuW9ilyVF/rN3b2GUORjAzZhHFhxzm'
b'ib6wlOGOzlUYKceLE01RGzS0fxPO6FJB1v7ozgs6unnB25yRxMcHKOnRPVDMVm2JoHXMPR'
b'TVV3EoRkTGHRUBBNO6b612zxxmhwKqhtxZtFg0aqUO1KfxvcNIBh+LtJfMA2rPqDbYCTUF'
b'kphZrzNINY4x8G/6B75NisYxN4milcDJ2O9gYAJw4r3XGe/OflFL50ht9EZQQ9r39obQnb'
b'oDQq9OwLw5XPLD6NNF4s5FXO2zzoUz2mkVxnjte5GMz1hg9HbQaEXbOPUn0qqa1OEsdhe5'
b'iSI+4mEktTbgc/P5El4qxlzdABeZnKeMYDiteX++N8eASvpiUs9fyHSV4tzho/Q6OF7/r0'
b'qPxnlQWHhkwV1lSbyFPHXAKFucbzMgjkKYKpaEosDRPkDlgjoz+8+hRDAvsvjIOROpGzxD'
b'1m2b9KhAmAOvR93YEAj3odEUG/OljQ9XBgnb2IWh7c73hCc6DGk3tUtHqFZnA5Rmn1lSjU'
b'6oMtoD5o8vymYONSy6ngX1cuAhzcNTD83sT6pI/rIkSqp5HLSFt4h5ZuQTZhszLy/CYXQ6'
b'N0m/iAFfisTpJ6ehvAf60R6OZ+WVuQPch5VLphyasbnkz8wfUgqiHrKbWSpY/vFS6ZfjsL'
b'k8mOXaFYnfeXz1q7lFxTC5+N9t/G7BgtBLtzOWgjQkNeQxLJdmgoQF0txgmIPYY7F5pWg7'
b'aUE2nEyLrPmhpwQpgV3/nWcOUT/U6ipyJrrNBfFEd7eAVmuEqMhqjXCe/EGtO03+kKM0Nb'
b'/3ygCGgDp9l5EcGVmXxK4MjSui46N0DM1f1ea/00lErSPqQVNZFVEzTeW5pjidClRQaTwy'
b'1os8/gfPlX0H/l/9XGlUETfWq4T1PT/Xzo+Hjtc6KI1xlfyhl0xRhqKLtZPkD2eCNMdn1D'
b'HA3cBTlRjd8REUMUUGNcWA0X2AbWVfe43woGKNuP5+O4unMT7yZbkBM6S7Gsu6mAo08moZ'
b'7rCBhWYCjdwaRpyaSqCRW8OQ+mqxOmAj15bj33y1WBOwkWvDifOnFGjk1jLc9f8Wmgg0cm'
b'sY/p1XCxUCjdyCIZ3qInG10Ru5IKN8Wiis+U5rTWWFpvJUU6H2emTcejx+1Qg8I24ERHmR'
b'j7E2xiTCU9IzpRoL74G0gronQJpVhPjnPRQs2zTBb7RwF1x6z0YeZwuE4T8T6n59Mq+wto'
b'K4W2PThSDRQB+8mlGLw2EbQzKQ5XxJ3bP8zbMe8tHUgVQjYNpY+BbkA5op+mBNdQxgLrr1'
b'6ZorjEtBWaWBKGVVwvVGqILH6Nz/ArTavZuA9NsbRSKbPjnxjdvwRKyOsCsZxt3IDK4dYc'
b'oQbkVWIJcJp2asYqtETdIcrfcNJ0l8NwdpbaI2A61N1DQdWRkgK9ZmQxBjo1nCVIu/KXjO'
b'SvSayRj3J7tTQuNOcx8ElYsy0W8spSD9rhamqcdgK4X5bnhLoUVcsVUU2WpHCYPKMZrTzw'
b'zt92GKJpByJqdAfnaYQ/L5J6PQQd9qCKGwgsJUChIUJsTdPfGBHTtPZRE6mpsALOg6IGZL'
b'YFVi0n1UKwB5asmgk08IjA4eM2BdbgvSb52x49UH5fL0btWucvxTt3fm3NwxMlVeKDoqXw'
b'plTrcZiU/b8bBq0Xhcre3IGTNCfz1my8hR27EzZoz8OXYALe0H19qOoYKNfDuOH15rO4oK'
b'NnJtOXGyqoCNXFtOGGJrO5AGcOTesWSQre1QGsCRe8uKM6sM2Mi14/iBtrbjqWAj15YjQ2'
b'1tR1TBRq7JsZ2tXezPeIsdoF6pdJUFaBS7VuVlcXWoyRxeOvIFHW9o3gZSXUNfoQfTCyaY'
b'eB3DoXkSA6cfKT9sOEv7GYyhGw3ou0AKMkbXUJiAzv0Dfbi5LATDfHt3tdiQOny02ODg8b'
b'JCbuHRTawTi46Pi881HBsNzhxL3DogNpJnf0X0yjxx4fFo1cIJN178gU5g8WjlI18oNA7d'
b'xRofZ19acLyOkbt8HZs/urQj5cd+ZIVZMiiurJuh2uyZ2bXs0THJmYOPvXfJgVCvjtSMRX'
b'eEmo46QjTXnlZ0PEvJL23ZXxjE7UVZNv06y1UTZ0C0RjeLOFr0RcQJa57ZMheO223ImjaG'
b'9Lm1WczSAWVkxbYCKQM/RydfMMs6aqPBAqlx5wzYqBZChYaGHIjmaYgoOj+A0ovOC2g6yn'
b'NUI4giJwQgnOj48KOVreWCtNewUhL6Cg1y9bVEqaFH9xIxyOsTopOA+u16BekteAXf2kKc'
b'3mD7rcRbPL2lCL7edoX4Z3/KdoZoQ9bPPKH7N/iOzh8gW6PzB5qO8h+hIRij+yjNLbNonL'
b'xVTrTnq90l+2Y53InIrw93NskoTycB0TfuBfRWjubJdzP0BkvnZ55wqbLCj1bY6+QkCnvj'
b'vrXOWBYAN0GnMqSrcvS7iZWzZk5svJbUMOTNaC2pWQDU+nlt6KCfk9Z3dDBqfQmHpiOrHs'
b'YGfRn/b4cLYnzbdq9rA+3DyX4Kuu+ejZaTuu+wnBIjQfXzeNAOiGBK5Btsnlna22RMHb/f'
b'8/+dXCmC6h/wS3hmLbfw3gfnaE9ODCmBW7Lv9enM0mHeS2Fp7cRB3oUVRc592hRcuk57qT'
b'3oPVUO0I485t1YUWRfxIUh9Cw56VkPSD/rKVP3HVVFBK+mQitQ29c1LVNm9lNf3OmgG2Zz'
b'y8ay/PO6qAhhSpVZQu6Yg5Z1iuZYGcWMpEoN7YcK6DpCRs7grUP13u30SIUm0D0Mdt8sd9'
b'+jx9nmib+bccL9tFPXqaetckOPmmBmwKs2aN2OGyHK3j9iUdrPNNfEoyKyB0WEebYDxgtE'
b'Dr5aH3K43j3PkhuPVtBdtBu8JKD6A5RjdK2WpqP+oAVj3z8MO7v41AQyrD4pMFosUrhsmU'
b'4N9nXoURs5TjgBZosbeDS2oMp2+m7NLEtGpjEspK/mgnU2MH6GTWUHqHF6aZFggFdq4NYZ'
b'lYl14Ed1F4B6QLO1iB7jlx4KhnYOik3tKg8G+zoH3bKwc6JqQw/nOsp/h2lzOgeJQd3c0W'
b'JS1wrgjeqcFzGjc5HrHTjnJD7EMgmgnGKZKkyOsdQOdIZ4COzxLHflQ3E7baNVs4qAGoVL'
b'0vrCtpoAbwSSa/NSh+jnkVaLMoLDnXqrBUvScPSzSPAw0bC+hK9wTyJZtr60D74yDUfRrB'
b'K538I64ikMo6TlltzZFUlef2Fo9kCXvXJvlQmTBVodcEDQBwyww1R+px4RMbHoUQRj2/Yh'
b'zkx0vduo25xaYNRvlha96jgri497ThaRvtKOgvDYoD0yaL+dmB4x6xLNxH5CVE1pIss00S'
b'kidI8OGPe6Dr7qdR0ed7EEo6xiH7rlzceSKlbd3pxvmJmvoCJpOihIGjVfwxlwtriGxU/M'
b'FC/LKzT4cLwh1INFaqCgl1lBlAhzDYSgHCzOGkUHV0StvlCj1vZP5jFRqtT8pCnKwsGmTi'
b'l6dzmsz91ooYU8PZKhhukJeaPpaCRDTvW7i3o7ZmmB6MCzAfe9tc+hijHKKcY+nK6WdKYW'
b'Hq3oWHRkPdI6MF7lKZNblh/zJDb6KAwdHyilxt6zz48WZmx4o/tLl8ktcxEmkqc82Ef0f4'
b'YhyZBqwDTuwnBZBPKWvfqKbD9UGq96WHRAGBQNEA+JpYXCgGiAW8OhEUUPhsZlNBQaRA+E'
b'BpBhcGYoGQSXjvRDoHEsA6CJTg9/hh0/MbwS6HLkfsDbBuPwHvU7NnefeWcyQuaCyPhYGc'
b'iNjojL2XBnK/sZ7TQRs4c3K/epFekZ6oq+bhz1K1p4QeTcDT6pVrIwWDwec0d19O4eyi+6'
b'E5KudKvUdNQqIeWw6zcXI6uxtV6/OQW/9ixjzh7zkCdcdBKTZGQk2l+4GIt+T35WNmlIhX'
b'UhJNudC80m9lPXPAduzE6w+4yeWVOYPLM2TU6y1IQWbnRSPVlpHPbwwAswpp7a89zs0lF+'
b'08vcyw394mHL1w4x2M9nzkV4HslzfEjPTzQSXHnKhNsK9bB+6eGJUXtwd6BxVOqpgf6XmS'
b'P3JjTvFDWGzMKTJvCFp5zs3E70oYXzCddJKZ2bcIHRYLYDzWqjd1RpR3ZJ1rqiB++odo68'
b'+bHHvZymbF5RQ8zcw5Ueb7Q4HYN1GMolWtKpSHu1yhBarTIAn6TQPTqHbaLxkjPXCYjGj1'
b'XUE4uO1+0zC8c9e+mCGNkP5haNR4bSgqO+nU1IrwMiGnsqgs+RMyccFd1BhlI0ZziuG2Tp'
b'ODfaI0RVFmH2Wx38recOCwdz2UmHQ7YcxS4PW6rVNEwjpbsTZHH0pqymo+5kmcSvhxYUht'
b'q9tURLkbgLLyPh0B4ZrHlKC90IqsRGHQg2ZUsE8zZcXtfRvU6LhLbNUAr04dw5yYdneyQj'
b'c5Q1VeB7UHJqNyNH2/JaOpjyklbbvhXJ0fvcGbGr17nz5BytCa5IjzTzBUPvmaYoRcvkHC'
b'0frhQdnUmegHF+7bqdvuf8vOZBZxP0V6qXc34Y5ZRab6C2IzJoxgYM+ilIe1kn5s1nbZUP'
b'hiyDFfjG6Mu3DdBXnMPqV4mMeNDPW6IqGiBe30eVNOjYQp7F+3D1OGTDPLLw1Wl7eDEXjy'
b'bnsFiWWyK+q6VKgUZWCZRVnX+CLnCOVsYaQ8sCGmTQBw6mqAjdrccG5nSoLimfkxw941AS'
b'u3Hp6zzzjPHFAZMFOVcPP1QGDQfcTcC3bjjAAOI5V0E3ZO35cO9ZvSs8U+hI/KlhxbV7Vl'
b'vwRtRT4VxF3ZJ1fRtChaKJ7sUpFR01CjrcdS9bngvNeGZNSK9TmDh2PSft3WbQd7BNPOOP'
b'jksHgcGkK4XTkLeUY8MQRXdpKFEtKUpY2aFTqpZ8KO1sXx1lhp3DhXOKDBfOGTBcOGfIk6'
b'6GDZpi97UPM+pZY4Fo6kUwOuJQkPa9oiF0t+iA0C8aIPQ7+cTQI/uXBUEuNT1jpBndwViP'
b'eNFFjJVm+tX+KLSrKxlRH3QvkzWGHlXTuQGv2ox1O66+jA99Qfdnfzqb+zdyCzzyMGLGd+'
b'VA2ieCavtpTnqk9ntkxE/U7KxfzWZnwhlNaIUxnr42yXiX3uSNgUYzU+P0GM+WFoLJPGgS'
b'IKmtTB60SqOvhLs2UybEHQ9Z8vPFnCYRdkaMVmOTVZtYb+r8SOUgASYWGMKBktoi6ogJS9'
b'Ye2tF302eCnsx7cpzrhens4gY3TDENGyXDeXhuP4NXB6i5+MwiIQczDdyaj7vw/YzcBaAW'
b'r50DPUufeSjM0x0Uz9RzD4a5uoNudUhOVD1fd66jGbvDbh0SLy1LT+eda+nnnJMwpZ8L4C'
b'f1zotb7TNHUdoY4t2aJ7NB7RjSU7o06MPkLjg/Tyeprr9E1Y3u5kKdje7m0nQ0dhgGmtFV'
b'I514xqiNenzcRLNkPDmoHDJqoHQoz7yFR7Wcoj+xkLNdyR01RORmuNzvnJPSeeARERajXV'
b'azUDSDmFrQz+Yciozv9506PEShedIxDBulQ+LBxKAv0YtmlERd/eBOlFDm6FrxCsqtNmAp'
b'QUerJJBUvwfNNhFdVYX+IrqqStNR2TIgxIPs//NMc9qnrbUca4uIIXdGs0FaXLktPRac1R'
b'7a9xsHVQZ67M29Ms3SUGbZjxNVEnw8GB2o8WrutbDShd01hkAzRn+/8ATZwmlgj45m22GC'
b'fUSf0Jkb5GiePf0uV7YCl991ok8Uz266sqZMOR+I/i5bImq/70bHhC4CqrWMGwjZHWv3o0'
b'uTnGWRB6mn/ZA1803ZqXnSW+zOFeRNdhGC3Efo18SR5cd+/bRBsHziwRC7R16aPrXEkTtA'
b'zdwSPMRPa1jagPLZWr4013NO5D7DRCoCwlTKwWEyRSCaNBjAGHZSceNnmmlCc7J7RYRVdA'
b'eMN1gcfLXB4vB4g4XgNrrIDrmnVzPQcvUEe7Yi7W/BMIS+lccB4coOAvoE9czQ8RyQ88vr'
b'KU3DJn41u2jYEcQa7MQAXoW1lNZhPRKUWCLeOKtG5NHNYKgP0c1gmo46FlSPy/g2D47Sl/'
b'F1HosrMDoZjSx67XZflZ7ROEQGWu8kaGm5Q2SwNH4O57ewNZw7RDSGIp9OHSYaYOUBCZkB'
b'8WauPONH0D8MqbSjmnSQOQ3kLc3IhOr1IuN1dLNO4bDvIboPmZCjdajaAkGDMkCsP2UWCt'
b'qTAW7pTiYpWnMyLiO9ySC3tCYjtNaZjEspSMMO+tLMkV5bMo6lSI0c8m5OY7JQK0PGtVeF'
b'HNEfN0bRnCa8RhnxXeR2tXlyMes5GaK9KLM/UuqylxqkuxqtXCYXubwMIYaFFUeEy8saDc'
b'hKS5VEz4HmyWWzDt1HkYIOt41VlpSzIZDd2yFCRH3b2CKQ3jMmxIJJ9HnAJBlzhQXRVmmA'
b'nQDpUkUjdxItS4DqpjAIKTeUQUptJmnI8C4xSH3tD8LR14lBd7i4C8qaif30V860M0uraC'
b'muvqCsbSwdhbi0mFxQtgIdX1DGHNeQzhDk3ZUdMmTUtxSVye3lYXjVt1Ogz7+EO8yQqZKZ'
b'6Ogu148YrzyoluQq43J08xOkj1RGlAVX4PytQcVK0eYS7QlTIJD2m2u3uqvJFe4vJ6Jb9x'
b'TxnJ/s7cyy9QQlJxdaMRt8u2eRvsgLPCTQiqMtbzQonsg2158tCk/ox4ebMeh1SBO44fgL'
b'HzAPc4jcn4bK8DI2xPeYO0kBEaL8ZQKsdT0v37+Mn8qGwnc1/E2L5Gr0m4+xaPBD3UAPtz'
b'ZW8GrldBXgq1czG5S7f5KY/qP7rCoPSCeA6HVvh6yRboXfusVaOjRZ0le1LgN4y+45wr3F'
b'cwRqW2cwbgWSJtdhaEwHkSZf2cWXyVfZSyvwrbfSLB0MlEjrW4or0NwsWJIRtgdyRZbFCA'
b'hLkgYMS5KWNKe4oAE3QgWt2GDaz2pC5G0IL7uhZ/sahhkEqXo9qEHRS88YW78q3XI+JTlS'
b'LRtiV5rlguhYsVwC1JkzA23ejeDuiu8TzAg6qRYCcBKrngabLCOOPo8yizjhjaI4LAfWAK'
b'Pbb9vkq5/LIE16WWMFt2iC+uEkNHcL+TrkaV1/iJ3WR31XPObpDvNNRADdTgBGHS+qoJ6r'
b'VxDImJjefGe8HTN1UjxTG602yf9isEoPOoB58lU6XVQlP/hVSGxQ+ZHjeiyeoeLogW01TV'
b'5ZyFXy6rsVJPl1re4snYHUhzdWoPXhDU1H8i7IkGBqUOM+tG49qAMkeFZ2uAWF+2ou1uME'
b'ncF+fbs9hCE169ewU8g4R89ImtBfw0uUYTV9GjNib3WZvKpnhpbJa2i5pSXETB3d8Ksaz2'
b'uSaosN85BX1dKhO73q3axZChq+OSbwFuo0RSqixkoHIV+Rnk7dmwrJvKZUwyFNFvTFkAaQ'
b'Rwox0CrAzWWAL2cOh07VHeOFmEn7HZ4qB2i/1278Cstk9T2mDmFqHaHb2huT/GJRRYi7NJ'
b'zn4LjlZSqRclw7x8PrwV+kY5yEk3g8kn7lRrOXls2kfS+IRX7tRrNTz+b94ryja7SmVX6H'
b'L4tRLs2G/m46Zjccab4LxPjzb+PxRl2H9jTYCAZcFhVnLgmnMw0Yy4mTWG0/lr48/7fFu/'
b'r7TiStLhnQF7+X0GLsQjNRFHpBfDYBrVuNoaWZQOaoW0ce6SXXWQZa+9Z0pNQhQwbzMMmM'
b'H5HdC1noSf1GUIY4pL9GeEbfTLmF/KrPysFV6L1RB98OZqK0Sjj3xHDzpxqB82Xypza3zp'
b'JgT4lZ1p+6F4LTqBdqkj+jEx3QCf7kBUpNm0SWjui4xawRmfynkrXNEz4EBD30bb3ehA57'
b'2ib6tnRouG8yM18mcnF6Rlz1ZFkSXaNuvOmlLNJ68JiC1uOGpqOByDAkmhTUfs3h1e+6Ut'
b'yroSn3oI7iCozqwgJcrdqXcB7Ko7ZEGCaq5E3P9JG8qIAsLdPgInlTCuB0TtLcCB+GsGUW'
b'wFg3ZF6Od4pXxvWtkbCMGaORcB5zxzvNqFgRf7TlDIXk7Xp7GlPwt6vdaegmb7eNKzD+vn'
b'3HuALV9e2WccXMBGa3LIezXTcJGYc6oSoi029MU5nncZsmokZbQ16dDq8ZwHG9RRN4Q9sM'
b'JhbzCI8fxjI8fXHZlBl5vLmCgwYHKDYETAUbH7VnVXasGGcFOPdhijKDDF55YIm4bYpmaj'
b'/9agumUm+91oGRC1rwgvxgdIhY+sMb+mmMFWzD8eYYhYi6G6RtMA9mm48wT1NkmJYZMEzL'
b'DBlNsTKH6PsyVk0KMaID4ag0QxC5Zji62deKjnqWkgypDSiwqzuvoe29XV163V6BUT+C/s'
b'g8VmLPJ6AgBt1PGmFVh2ZieJNttIxJfgtv72KWJkvgLMmX4alDIe9ZAryXaR5D+oJRlCtt'
b'4uZIpR+skDN6sIIoftrBShkGLiQhOvGNIC4qg9EJRAfAS0VHGVyQIVVpAup03z/pPrZxWD'
b'+c+8c+ejQDQxp4u/4MPUTDVYBv+ZqRPS7GwoNa7CswKkbGrroVdowX3XuwJ9Xj5HJF2i8Y'
b'r5JvHFvnyTd9WA36xjdZRCbPO2/wrS8cIK2MOmuSI6NOBnVt1FkZNBh1Gldjo04G16szXJ'
b'mhR0e4JgC1jSdD+qN7xIRbHVhFCRs0visQvfW39fEPtSnPGN/M2adlaT9D1xABoXNwcOge'
b'AGhtCSn1S+VVi28ZqWeWcCM1an0KwBp+8tO+sV4tzJcYVjraj9ezPPkWLeAgtpuWk2hS37'
b'pbJ6NRAaITtgg/OmFL+mh2rybmK2z/WFrtX5UG8FtSltJ7Sh4Jm0oWiXeVbLB6s8gi0W6R'
b'hfSukEXUzo8F9HkXi/jtHUuZZvT7wLfOqAusAngYDg7PJpNFwK0MwFD3ndEakhGdR0ShbD'
b'vdnOYEzKK/vko+I6oLj+HcLr3KcG4U3zL5Fh0rQwWOjpWRPgzqPnBUQW0lwoYRDYwQNToR'
b'A/fRiRjQ0s/D79gsABOib2GDDQmK7OEReGQPP0/+7a59v0z+H+SUGTTsMAEA'
)).decode().splitlines()
def ConversionHeader(i: str, filename: OptStr ="unknown"):
t = i.lower()
import textwrap
html = textwrap.dedent("""
<!DOCTYPE html>
<html>
<head>
<style>
body{background-color:gray}
div{position:relative;background-color:white;margin:1em auto}
p{position:absolute;margin:0}
img{position:absolute}
</style>
</head>
<body>
""")
xml = textwrap.dedent("""
<?xml version="1.0"?>
<document name="%s">
"""
% filename
)
xhtml = textwrap.dedent("""
<?xml version="1.0"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<style>
body{background-color:gray}
div{background-color:white;margin:1em;padding:1em}
p{white-space:pre-wrap}
</style>
</head>
<body>
""")
text = ""
json = '{"document": "%s", "pages": [\n' % filename
if t == "html":
r = html
elif t == "json":
r = json
elif t == "xml":
r = xml
elif t == "xhtml":
r = xhtml
else:
r = text
return r
def ConversionTrailer(i: str):
t = i.lower()
text = ""
json = "]\n}"
html = "</body>\n</html>\n"
xml = "</document>\n"
xhtml = html
if t == "html":
r = html
elif t == "json":
r = json
elif t == "xml":
r = xml
elif t == "xhtml":
r = xhtml
else:
r = text
return r
def adobe_glyph_names() -> tuple:
'''
Adobe Glyph List function
'''
if _adobe_unicodes == {}:
for line in _get_glyph_text():
if line.startswith("#"):
continue
gname, unc = line.split(";")
c = int("0x" + unc[:4], base=16)
_adobe_unicodes[gname] = c
return tuple(_adobe_unicodes.keys())
def adobe_glyph_unicodes() -> tuple:
'''
Adobe Glyph List function
'''
if _adobe_unicodes == {}:
for line in _get_glyph_text():
if line.startswith("#"):
continue
gname, unc = line.split(";")
c = int("0x" + unc[:4], base=16)
_adobe_unicodes[gname] = c
return tuple(_adobe_unicodes.values())
def annot_preprocess(page: "Page") -> int:
"""Prepare for annotation insertion on the page.
Returns:
Old page rotation value. Temporarily sets rotation to 0 when required.
"""
CheckParent(page)
if not page.parent.is_pdf:
raise ValueError("is no PDF")
old_rotation = page.rotation
if old_rotation != 0:
page.set_rotation(0)
return old_rotation
def annot_postprocess(page: "Page", annot: "Annot") -> None:
"""Clean up after annotation inertion.
Set ownership flag and store annotation in page annotation dictionary.
"""
#annot.parent = weakref.proxy(page)
assert isinstance( page, Page)
assert isinstance( annot, Annot)
annot.parent = page
page._annot_refs[id(annot)] = annot
annot.thisown = True
def canon(c):
assert isinstance(c, int)
# TODO: proper unicode case folding
# TODO: character equivalence (a matches ä, etc)
if c == 0xA0 or c == 0x2028 or c == 0x2029:
return ord(' ')
if c == ord('\r') or c == ord('\n') or c == ord('\t'):
return ord(' ')
if c >= ord('A') and c <= ord('Z'):
return c - ord('A') + ord('a')
return c
def chartocanon(s):
assert isinstance(s, str)
n, c = mupdf.fz_chartorune(s)
c = canon(c)
return n, c
def dest_is_valid(o, page_count, page_object_nums, names_list):
p = mupdf.pdf_dict_get( o, PDF_NAME('A'))
if (
mupdf.pdf_name_eq(
mupdf.pdf_dict_get( p, PDF_NAME('S')),
PDF_NAME('GoTo')
)
and not string_in_names_list(
mupdf.pdf_dict_get( p, PDF_NAME('D')),
names_list
)
):
return 0
p = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
if not p.m_internal:
pass
elif mupdf.pdf_is_string( p):
return string_in_names_list( p, names_list)
elif not dest_is_valid_page(
mupdf.pdf_array_get( p, 0),
page_object_nums,
page_count,
):
return 0
return 1
def dest_is_valid_page(obj, page_object_nums, pagecount):
num = mupdf.pdf_to_num(obj)
if num == 0:
return 0
for i in range(pagecount):
if page_object_nums[i] == num:
return 1
return 0
def find_string(s, needle):
assert isinstance(s, str)
for i in range(len(s)):
end = match_string(s[i:], needle)
if end is not None:
end += i
return i, end
return None, None
def get_pdf_now() -> str:
'''
"Now" timestamp in PDF Format
'''
import time
tz = "%s'%s'" % (
str(abs(time.altzone // 3600)).rjust(2, "0"),
str((abs(time.altzone // 60) % 60)).rjust(2, "0"),
)
tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime())
if time.altzone > 0:
tstamp += "-" + tz
elif time.altzone < 0:
tstamp += "+" + tz
else:
pass
return tstamp
class ElementPosition(object):
"""Convert a dictionary with element position information to an object."""
def __init__(self):
pass
def make_story_elpos():
return ElementPosition()
def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list:
"""Return rectangles of text lines between two points.
Notes:
The default of 'start' is top-left of 'clip'. The default of 'stop'
is bottom-reight of 'clip'.
Args:
start: start point_like
stop: end point_like, must be 'below' start
clip: consider this rect_like only, default is page rectangle
Returns:
List of line bbox intersections with the area established by the
parameters.
"""
# validate and normalize arguments
if clip is None:
clip = page.rect
clip = Rect(clip)
if start is None:
start = clip.tl
if stop is None:
stop = clip.br
clip.y0 = start.y
clip.y1 = stop.y
if clip.is_empty or clip.is_infinite:
return []
# extract text of page, clip only, no images, expand ligatures
blocks = page.get_text(
"dict", flags=0, clip=clip,
)["blocks"]
lines = [] # will return this list of rectangles
for b in blocks:
bbox = Rect(b["bbox"])
if bbox.is_infinite or bbox.is_empty:
continue
for line in b["lines"]:
bbox = Rect(line["bbox"])
if bbox.is_infinite or bbox.is_empty:
continue
lines.append(bbox)
if lines == []: # did not select anything
return lines
lines.sort(key=lambda bbox: bbox.y1) # sort by vertical positions
# cut off prefix from first line if start point is close to its top
bboxf = lines.pop(0)
if bboxf.y0 - start.y <= 0.1 * bboxf.height: # close enough?
r = Rect(start.x, bboxf.y0, bboxf.br) # intersection rectangle
if not (r.is_empty or r.is_infinite):
lines.insert(0, r) # insert again if not empty
else:
lines.insert(0, bboxf) # insert again
if lines == []: # the list might have been emptied
return lines
# cut off suffix from last line if stop point is close to its bottom
bboxl = lines.pop()
if stop.y - bboxl.y1 <= 0.1 * bboxl.height: # close enough?
r = Rect(bboxl.tl, stop.x, bboxl.y1) # intersection rectangle
if not (r.is_empty or r.is_infinite):
lines.append(r) # append if not empty
else:
lines.append(bboxl) # append again
return lines
def glyph_name_to_unicode(name: str) -> int:
'''
Adobe Glyph List function
'''
if _adobe_unicodes == {}:
for line in _get_glyph_text():
if line.startswith("#"):
continue
gname, unc = line.split(";")
c = int(unc[:4], base=16)
_adobe_unicodes[gname] = c
return _adobe_unicodes.get(name, 65533)
def hdist(dir, a, b):
dx = b.x - a.x
dy = b.y - a.y
return mupdf.fz_abs(dx * dir.x + dy * dir.y)
def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list:
"""Return a list of (rows x cols) equal sized rectangles.
Notes:
A utility to fill a given area with table cells of equal size.
Args:
rect: rect_like to use as the table area
rows: number of rows
cols: number of columns
Returns:
A list with <rows> items, where each item is a list of <cols>
PyMuPDF Rect objects of equal sizes.
"""
rect = Rect(rect) # ensure this is a Rect
if rect.is_empty or rect.is_infinite:
raise ValueError("rect must be finite and not empty")
tl = rect.tl
height = rect.height / rows # height of one table cell
width = rect.width / cols # width of one table cell
delta_h = (width, 0, width, 0) # diff to next right rect
delta_v = (0, height, 0, height) # diff to next lower rect
r = Rect(tl, tl.x + width, tl.y + height) # first rectangle
# make the first row
row = [r]
for i in range(1, cols):
r += delta_h # build next rect to the right
row.append(r)
# make result, starts with first row
rects = [row]
for i in range(1, rows):
row = rects[i - 1] # take previously appended row
nrow = [] # the new row to append
for r in row: # for each previous cell add its downward copy
nrow.append(r + delta_v)
rects.append(nrow) # append new row to result
return rects
def util_ensure_widget_calc(annot):
'''
Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO
'''
annot_obj = mupdf.pdf_annot_obj(annot.this)
pdf = mupdf.pdf_get_bound_document(annot_obj)
PDFNAME_CO = mupdf.pdf_new_name("CO") # = PDF_NAME(CO)
acro = mupdf.pdf_dict_getl( # get AcroForm dict
mupdf.pdf_trailer(pdf),
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
)
CO = mupdf.pdf_dict_get(acro, PDFNAME_CO) # = AcroForm/CO
if not mupdf.pdf_is_array(CO):
CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2)
n = mupdf.pdf_array_len(CO)
found = 0
xref = mupdf.pdf_to_num(annot_obj)
for i in range(n):
nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i))
if xref == nxref:
found = 1
break
if not found:
mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0))
def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
'''
Helper for initialising rectangle classes.
2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which
uses `goto` in ways that don't easily translate to Python.
Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1,
x0, y0, x1, y1 if they are not None.
Accepts following forms for <args>:
() returns all zeros.
(top-left, bottom-right)
(top-left, x1, y1)
(x0, y0, bottom-right)
(x0, y0, x1, y1)
(rect)
Where top-left and bottom-right are (x, y) or something with .x, .y
members; rect is something with .x0, .y0, .x1, and .y1 members.
2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None.
'''
def get_xy( arg):
if isinstance( arg, (list, tuple)) and len( arg) == 2:
return arg[0], arg[1]
if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)):
return arg.x, arg.y
return None, None
def make_tuple( a):
if isinstance( a, tuple):
return a
if isinstance( a, Point):
return a.x, a.y
elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)):
return a.x0, a.y0, a.x1, a.y1
if not isinstance( a, (list, tuple)):
a = a,
return a
def handle_args():
if len(args) == 0:
return 0, 0, 0, 0
elif len(args) == 1:
arg = args[0]
if isinstance( arg, (list, tuple)) and len( arg) == 2:
p1, p2 = arg
return *p1, *p2
if isinstance( arg, (list, tuple)) and len( arg) == 3:
a, b, c = arg
a = make_tuple(a)
b = make_tuple(b)
c = make_tuple(c)
ret = *a, *b, *c
return ret
arg = make_tuple( arg)
return arg
elif len(args) == 2:
return get_xy( args[0]) + get_xy( args[1])
elif len(args) == 3:
x0, y0 = get_xy( args[0])
if (x0, y0) != (None, None):
return x0, y0, args[1], args[2]
x1, y1 = get_xy( args[2])
if (x1, y1) != (None, None):
return args[0], args[1], x1, y1
elif len(args) == 4:
return args[0], args[1], args[2], args[3]
raise Exception( f'Unrecognised args: {args}')
ret_x0, ret_y0, ret_x1, ret_y1 = handle_args()
if p0 is not None: ret_x0, ret_y0 = get_xy(p0)
if p1 is not None: ret_x1, ret_y1 = get_xy(p1)
if x0 is not None: ret_x0 = x0
if y0 is not None: ret_y0 = y0
if x1 is not None: ret_x1 = x1
if y1 is not None: ret_y1 = y1
return ret_x0, ret_y0, ret_x1, ret_y1
def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
def convert(x):
ret = int(x)
return ret
a = convert(a)
b = convert(b)
c = convert(c)
d = convert(d)
return a, b, c, d
def util_round_rect( rect):
return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect)))
def util_transform_rect( rect, matrix):
if g_use_extra:
return extra.util_transform_rect( rect, matrix)
return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix)))
def util_intersect_rect( r1, r2):
return JM_py_from_rect(
mupdf.fz_intersect_rect(
JM_rect_from_py(r1),
JM_rect_from_py(r2),
)
)
def util_is_point_in_rect( p, r):
return mupdf.fz_is_point_inside_rect(
JM_point_from_py(p),
JM_rect_from_py(r),
)
def util_include_point_in_rect( r, p):
return JM_py_from_rect(
mupdf.fz_include_point_in_rect(
JM_rect_from_py(r),
JM_point_from_py(p),
)
)
def util_point_in_quad( P, Q):
p = JM_point_from_py(P)
q = JM_quad_from_py(Q)
return mupdf.fz_is_point_inside_quad(p, q)
def util_transform_point( point, matrix):
return JM_py_from_point(
mupdf.fz_transform_point(
JM_point_from_py(point),
JM_matrix_from_py(matrix),
)
)
def util_union_rect( r1, r2):
return JM_py_from_rect(
mupdf.fz_union_rect(
JM_rect_from_py(r1),
JM_rect_from_py(r2),
)
)
def util_concat_matrix( m1, m2):
return JM_py_from_matrix(
mupdf.fz_concat(
JM_matrix_from_py(m1),
JM_matrix_from_py(m2),
)
)
def util_invert_matrix(matrix):
if 0:
# Use MuPDF's fz_invert_matrix().
if isinstance( matrix, (tuple, list)):
matrix = mupdf.FzMatrix( *matrix)
elif isinstance( matrix, mupdf.fz_matrix):
matrix = mupdf.FzMatrix( matrix)
elif isinstance( matrix, Matrix):
matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f)
assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}'
ret = mupdf.fz_invert_matrix( matrix)
if ret == matrix and (0
or abs( matrix.a - 1) >= sys.float_info.epsilon
or abs( matrix.b - 0) >= sys.float_info.epsilon
or abs( matrix.c - 0) >= sys.float_info.epsilon
or abs( matrix.d - 1) >= sys.float_info.epsilon
):
# Invertion not possible.
return 1, ()
return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
# Do invertion in python.
src = JM_matrix_from_py(matrix)
a = src.a
det = a * src.d - src.b * src.c
if det < -sys.float_info.epsilon or det > sys.float_info.epsilon:
dst = mupdf.FzMatrix()
rdet = 1 / det
dst.a = src.d * rdet
dst.b = -src.b * rdet
dst.c = -src.c * rdet
dst.d = a * rdet
a = -src.e * dst.a - src.f * dst.c
dst.f = -src.e * dst.b - src.f * dst.d
dst.e = a
return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f)
return 1, ()
def util_measure_string( text, fontname, fontsize, encoding):
font = mupdf.fz_new_base14_font(fontname)
w = 0
pos = 0
while pos < len(text):
t, c = mupdf.fz_chartorune(text[pos:])
pos += t
if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK:
c = mupdf.fz_iso8859_7_from_unicode(c)
elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC:
c = mupdf.fz_windows_1251_from_unicode(c)
else:
c = mupdf.fz_windows_1252_from_unicode(c)
if c < 0:
c = 0xB7
g = mupdf.fz_encode_character(font, c)
dw = mupdf.fz_advance_glyph(font, g, 0)
w += dw
ret = w * fontsize
return ret
def util_sine_between(C, P, Q):
# for points C, P, Q compute the sine between lines CP and QP
c = JM_point_from_py(C)
p = JM_point_from_py(P)
q = JM_point_from_py(Q)
s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y))
m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y)
m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
m1 = mupdf.fz_concat(m1, m2)
c = mupdf.fz_transform_point(c, m1)
c = mupdf.fz_normalize_vector(c)
return c.y
def util_hor_matrix(C, P):
'''
Return the matrix that maps two points C, P to the x-axis such that
C -> (0,0) and the image of P have the same distance.
'''
c = JM_point_from_py(C)
p = JM_point_from_py(P)
# compute (cosine, sine) of vector P-C with double precision:
s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y))
m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y)
m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
return JM_py_from_matrix(mupdf.fz_concat(m1, m2))
def match_string(h0, n0):
h = 0
n = 0
e = h
delta_h, hc = chartocanon(h0[h:])
h += delta_h
delta_n, nc = chartocanon(n0[n:])
n += delta_n
while hc == nc:
e = h
if hc == ord(' '):
while 1:
delta_h, hc = chartocanon(h0[h:])
h += delta_h
if hc != ord(' '):
break
else:
delta_h, hc = chartocanon(h0[h:])
h += delta_h
if nc == ord(' '):
while 1:
delta_n, nc = chartocanon(n0[n:])
n += delta_n
if nc != ord(' '):
break
else:
delta_n, nc = chartocanon(n0[n:])
n += delta_n
return None if nc != 0 else e
def on_highlight_char(hits, line, ch):
assert hits
assert isinstance(line, mupdf.FzStextLine)
assert isinstance(ch, mupdf.FzStextChar)
vfuzz = ch.m_internal.size * hits.vfuzz
hfuzz = ch.m_internal.size * hits.hfuzz
ch_quad = JM_char_quad(line, ch)
if hits.len > 0:
# fixme: end = hits.quads[-1]
quad = hits.quads[hits.len - 1]
end = JM_quad_from_py(quad)
if ( 1
and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz
and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz
and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz
and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz
):
end.ur = ch_quad.ur
end.lr = ch_quad.lr
assert hits.quads[-1] == end
return
hits.quads.append(ch_quad)
hits.len += 1
def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map):
'''
Deep-copies a source page to the target.
Modified version of function of pdfmerge.c: we also copy annotations, but
we skip some subtypes. In addition we rotate output.
'''
if g_use_extra:
#log( 'Calling C++ extra.page_merge()')
return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map)
# list of object types (per page) we want to copy
known_page_objs = [
PDF_NAME('Contents'),
PDF_NAME('Resources'),
PDF_NAME('MediaBox'),
PDF_NAME('CropBox'),
PDF_NAME('BleedBox'),
PDF_NAME('TrimBox'),
PDF_NAME('ArtBox'),
PDF_NAME('Rotate'),
PDF_NAME('UserUnit'),
]
page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from)
# make new page dict in dest doc
page_dict = mupdf.pdf_new_dict(doc_des, 4)
mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page'))
# copy objects of source page into it
for i in range( len(known_page_objs)):
obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i])
if obj.m_internal:
#log( '{=type(graft_map) type(graft_map.this)}')
mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj))
# Copy annotations, but skip Link, Popup, IRT, Widget types
# If selected, remove dict keys P (parent) and Popup
if copy_annots:
old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots'))
n = mupdf.pdf_array_len( old_annots)
if n > 0:
new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n)
for i in range(n):
o = mupdf.pdf_array_get( old_annots, i)
if not o.m_internal or not mupdf.pdf_is_dict(o):
continue # skip non-dict items
if mupdf.pdf_dict_gets( o, "IRT").m_internal:
continue
subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')):
continue
if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
continue
if mupdf.pdf_name_eq( subtype, PDF_NAME('Widget')):
mupdf.fz_warn( "skipping widget annotation")
continue
if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')):
continue
mupdf.pdf_dict_del( o, PDF_NAME('Popup'))
mupdf.pdf_dict_del( o, PDF_NAME('P'))
copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o)
annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0)
mupdf.pdf_array_push( new_annots, annot)
# rotate the page
if rotate != -1:
mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate)
# Now add the page dictionary to dest PDF
ref = mupdf.pdf_add_object( doc_des, page_dict)
# Insert new page at specified location
mupdf.pdf_insert_page( doc_des, page_to, ref)
def paper_rect(s: str) -> Rect:
"""Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked.
"""
width, height = paper_size(s)
return Rect(0.0, 0.0, width, height)
def paper_size(s: str) -> tuple:
"""Return a tuple (width, height) for a given paper format string.
Notes:
'A4-L' will return (842, 595), the values for A4 landscape.
Suffix '-P' and no suffix return the portrait tuple.
"""
size = s.lower()
f = "p"
if size.endswith("-l"):
f = "l"
size = size[:-2]
if size.endswith("-p"):
size = size[:-2]
rc = paper_sizes().get(size, (-1, -1))
if f == "p":
return rc
return (rc[1], rc[0])
def paper_sizes():
"""Known paper formats @ 72 dpi as a dictionary. Key is the format string
like "a4" for ISO-A4. Value is the tuple (width, height).
Information taken from the following web sites:
www.din-formate.de
www.din-formate.info/amerikanische-formate.html
www.directtools.de/wissen/normen/iso.htm
"""
return {
"a0": (2384, 3370),
"a1": (1684, 2384),
"a10": (74, 105),
"a2": (1191, 1684),
"a3": (842, 1191),
"a4": (595, 842),
"a5": (420, 595),
"a6": (298, 420),
"a7": (210, 298),
"a8": (147, 210),
"a9": (105, 147),
"b0": (2835, 4008),
"b1": (2004, 2835),
"b10": (88, 125),
"b2": (1417, 2004),
"b3": (1001, 1417),
"b4": (709, 1001),
"b5": (499, 709),
"b6": (354, 499),
"b7": (249, 354),
"b8": (176, 249),
"b9": (125, 176),
"c0": (2599, 3677),
"c1": (1837, 2599),
"c10": (79, 113),
"c2": (1298, 1837),
"c3": (918, 1298),
"c4": (649, 918),
"c5": (459, 649),
"c6": (323, 459),
"c7": (230, 323),
"c8": (162, 230),
"c9": (113, 162),
"card-4x6": (288, 432),
"card-5x7": (360, 504),
"commercial": (297, 684),
"executive": (522, 756),
"invoice": (396, 612),
"ledger": (792, 1224),
"legal": (612, 1008),
"legal-13": (612, 936),
"letter": (612, 792),
"monarch": (279, 540),
"tabloid-extra": (864, 1296),
}
if mupdf_version_tuple >= (1, 23, 8):
def pdf_lookup_page_loc(doc, needle):
return mupdf.pdf_lookup_page_loc(doc, needle)
else:
def pdf_lookup_page_loc_imp(doc, node, skip, parentp, indexp):
assert isinstance(node, mupdf.PdfObj)
assert isinstance(skip, list) and len(skip) == 1
assert isinstance(indexp, list) and len(indexp) == 1
assert isinstance(parentp, list) and len(parentp) == 1 and isinstance(parentp[0], mupdf.PdfObj)
# Copy of MuPDF's internal pdf_lookup_page_loc_imp().
hit = None
stack = []
try:
while 1:
kids = mupdf.pdf_dict_get(node, PDF_NAME('Kids'))
len_ = mupdf.pdf_array_len( kids)
if len_ == 0:
raise Exception("malformed page tree")
# Every node we need to unmark goes into the stack
stack.append(node)
if mupdf.pdf_mark_obj( node):
raise Exception( "cycle in page tree")
for i in range(len_):
kid = mupdf.pdf_array_get( kids, i)
type_ = mupdf.pdf_dict_get( kid, PDF_NAME('Type'))
if type_.m_internal:
a = mupdf.pdf_name_eq( type_, PDF_NAME('Pages'))
else:
a = (
mupdf.pdf_dict_get( kid, PDF_NAME('Kids')).m_internal
and not mupdf.pdf_dict_get( kid, PDF_NAME('MediaBox')).m_internal
)
if a:
count = mupdf.pdf_dict_get_int( kid, PDF_NAME('Count'))
if (skip[0] < count):
node = kid
break
else:
skip[0] -= count
else:
if type_.m_internal:
a = not mupdf.pdf_name_eq( type_, PDF_NAME('Page'))
else:
a = not mupdf.pdf_dict_get( kid, PDF_NAME('MediaBox')).m_internal
if a:
mupdf.fz_warn( f"non-page object in page tree ({mupdf.pdf_to_name( type_)})")
if skip[0] == 0:
parentp[0] = node
indexp[0] = i
hit = kid
break
else:
skip[0] -= 1
# If i < len && hit != NULL the desired page was found in the
# Kids array, done. If i < len && hit == NULL the found page tree
# node contains a Kids array that contains the desired page, loop
# back to top to extract it. When i == len the Kids array has been
# exhausted without finding the desired page, give up.
if not ((hit is None or hit.m_internal is None) and i < len_):
break
finally:
for i in range(len(stack), 0, -1): # (i = stack_len; i > 0; i--)
mupdf.pdf_unmark_obj( stack[i-1])
return hit
def pdf_lookup_page_loc(doc, needle):
'''
Copy of MuPDF's internal pdf_lookup_page_loc().
'''
root = mupdf.pdf_dict_get( mupdf.pdf_trailer( doc), PDF_NAME('Root'))
node = mupdf.pdf_dict_get( root, PDF_NAME('Pages'))
skip = [needle]
if not node.m_internal:
raise Exception("cannot find page tree")
parentp = [mupdf.PdfObj()]
indexp = [0]
hit = pdf_lookup_page_loc_imp(doc, node, skip, parentp, indexp)
skip = skip[0]
parentp = parentp[0]
indexp = indexp[0]
if not hit.m_internal:
raise Exception("cannot find page %d in page tree" % needle+1)
return hit, parentp, indexp # We don't seem to return skip.
def pdfobj_string(o, prefix=''):
'''
Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>.
'''
assert 0, 'use mupdf.pdf_debug_obj() ?'
ret = ''
if mupdf.pdf_is_array(o):
l = mupdf.pdf_array_len(o)
ret += f'array {l}\n'
for i in range(l):
oo = mupdf.pdf_array_get(o, i)
ret += pdfobj_string(oo, prefix + ' ')
ret += '\n'
elif mupdf.pdf_is_bool(o):
ret += f'bool: {o.array_get_bool()}\n'
elif mupdf.pdf_is_dict(o):
l = mupdf.pdf_dict_len(o)
ret += f'dict {l}\n'
for i in range(l):
key = mupdf.pdf_dict_get_key(o, i)
value = mupdf.pdf_dict_get( o, key)
ret += f'{prefix} {key}: '
ret += pdfobj_string( value, prefix + ' ')
ret += '\n'
elif mupdf.pdf_is_embedded_file(o):
ret += f'embedded_file: {o.embedded_file_name()}\n'
elif mupdf.pdf_is_indirect(o):
ret += f'indirect: ...\n'
elif mupdf.pdf_is_int(o):
ret += f'int: {mupdf.pdf_to_int(o)}\n'
elif mupdf.pdf_is_jpx_image(o):
ret += f'jpx_image:\n'
elif mupdf.pdf_is_name(o):
ret += f'name: {mupdf.pdf_to_name(o)}\n'
elif o.pdf_is_null:
ret += f'null\n'
#elif o.pdf_is_number:
# ret += f'number\n'
elif o.pdf_is_real:
ret += f'real: {o.pdf_to_real()}\n'
elif mupdf.pdf_is_stream(o):
ret += f'stream\n'
elif mupdf.pdf_is_string(o):
ret += f'string: {mupdf.pdf_to_string(o)}\n'
else:
ret += '<>\n'
return ret
def repair_mono_font(page: "Page", font: "Font") -> None:
"""Repair character spacing for mono fonts.
Notes:
Some mono-spaced fonts are displayed with a too large character
distance, e.g. "a b c" instead of "abc". This utility adds an entry
"/W[0 65535 w]" to the descendent font(s) of font. The float w is
taken to be the width of 0x20 (space).
This should enforce viewers to use 'w' as the character width.
Args:
page: pymupdf.Page object.
font: pymupdf.Font object.
"""
if not font.flags["mono"]: # font not flagged as monospaced
return None
doc = page.parent # the document
fontlist = page.get_fonts() # list of fonts on page
xrefs = [ # list of objects referring to font
f[0]
for f in fontlist
if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity"))
]
if xrefs == []: # our font does not occur
return
xrefs = set(xrefs) # drop any double counts
width = int(round((font.glyph_advance(32) * 1000)))
for xref in xrefs:
if not TOOLS.set_font_width(doc, xref, width):
log("Cannot set width for '%s' in xref %i" % (font.name, xref))
def sRGB_to_pdf(srgb: int) -> tuple:
"""Convert sRGB color code to a PDF color triple.
There is **no error checking** for performance reasons!
Args:
srgb: (int) RRGGBB (red, green, blue), each color in range(255).
Returns:
Tuple (red, green, blue) each item in intervall 0 <= item <= 1.
"""
t = sRGB_to_rgb(srgb)
return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
def sRGB_to_rgb(srgb: int) -> tuple:
"""Convert sRGB color code to an RGB color triple.
There is **no error checking** for performance reasons!
Args:
srgb: (int) RRGGBB (red, green, blue), each color in range(255).
Returns:
Tuple (red, green, blue) each item in intervall 0 <= item <= 255.
"""
r = srgb >> 16
g = (srgb - (r << 16)) >> 8
b = srgb - (r << 16) - (g << 8)
return (r, g, b)
def string_in_names_list(p, names_list):
n = mupdf.pdf_array_len( names_list) if names_list else 0
str_ = mupdf.pdf_to_text_string( p)
for i in range(0, n, 2):
if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_:
return 1
return 0
def strip_outline(doc, outlines, page_count, page_object_nums, names_list):
'''
Returns (count, first, prev).
'''
first = None
count = 0
current = outlines
prev = None
while current.m_internal:
# Strip any children to start with. This takes care of
# First / Last / Count for us.
nc = strip_outlines(doc, current, page_count, page_object_nums, names_list)
if not dest_is_valid(current, page_count, page_object_nums, names_list):
if nc == 0:
# Outline with invalid dest and no children. Drop it by
# pulling the next one in here.
next = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
if not next.m_internal:
# There is no next one to pull in
if prev.m_internal:
mupdf.pdf_dict_del(prev, PDF_NAME('Next'))
elif prev.m_internal:
mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next)
mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev)
else:
mupdf.pdf_dict_del(next, PDF_NAME('Prev'))
current = next
else:
# Outline with invalid dest, but children. Just drop the dest.
mupdf.pdf_dict_del(current, PDF_NAME('Dest'))
mupdf.pdf_dict_del(current, PDF_NAME('A'))
current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
else:
# Keep this one
if not first or not first.m_internal:
first = current
prev = current
current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
count += 1
return count, first, prev
def strip_outlines(doc, outlines, page_count, page_object_nums, names_list):
if not outlines.m_internal:
return 0
first = mupdf.pdf_dict_get(outlines, PDF_NAME('First'))
if not first.m_internal:
nc = 0
else:
nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list)
if nc == 0:
mupdf.pdf_dict_del(outlines, PDF_NAME('First'))
mupdf.pdf_dict_del(outlines, PDF_NAME('Last'))
mupdf.pdf_dict_del(outlines, PDF_NAME('Count'))
else:
old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count')))
mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first)
mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last)
mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc))
return nc
trace_device_FILL_PATH = 1
trace_device_STROKE_PATH = 2
trace_device_CLIP_PATH = 3
trace_device_CLIP_STROKE_PATH = 4
def unicode_to_glyph_name(ch: int) -> str:
'''
Adobe Glyph List function
'''
if _adobe_glyphs == {}:
for line in _get_glyph_text():
if line.startswith("#"):
continue
name, unc = line.split(";")
uncl = unc.split()
for unc in uncl:
c = int(unc[:4], base=16)
_adobe_glyphs[c] = name
return _adobe_glyphs.get(ch, ".notdef")
def vdist(dir, a, b):
dx = b.x - a.x
dy = b.y - a.y
return mupdf.fz_abs(dx * dir.y + dy * dir.x)
class TOOLS:
'''
We use @staticmethod to avoid the need to create an instance of this class.
'''
def _derotate_matrix(page):
if isinstance(page, mupdf.PdfPage):
return JM_py_from_matrix(JM_derotate_page_matrix(page))
else:
return JM_py_from_matrix(mupdf.FzMatrix())
@staticmethod
def _fill_widget(annot, widget):
val = JM_get_widget_properties(annot, widget)
widget.rect = Rect(annot.rect)
widget.xref = annot.xref
widget.parent = annot.parent
widget._annot = annot # backpointer to annot object
if not widget.script:
widget.script = None
if not widget.script_stroke:
widget.script_stroke = None
if not widget.script_format:
widget.script_format = None
if not widget.script_change:
widget.script_change = None
if not widget.script_calc:
widget.script_calc = None
if not widget.script_blur:
widget.script_blur = None
if not widget.script_focus:
widget.script_focus = None
return val
@staticmethod
def _get_all_contents(page):
page = mupdf.pdf_page_from_fz_page(page.this)
res = JM_read_contents(page.obj())
result = JM_BinFromBuffer( res)
return result
@staticmethod
def _insert_contents(page, newcont, overlay=1):
"""Add bytes as a new /Contents object for a page, and return its xref."""
pdfpage = page._pdf_page()
ASSERT_PDF(pdfpage)
contbuf = JM_BufferFromBytes(newcont)
xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay)
#fixme: pdfpage->doc->dirty = 1;
return xref
@staticmethod
def _le_annot_parms(annot, p1, p2, fill_color):
"""Get common parameters for making annot line end symbols.
Returns:
m: matrix that maps p1, p2 to points L, P on the x-axis
im: its inverse
L, P: transformed p1, p2
w: line width
scol: stroke color string
fcol: fill color store_shrink
opacity: opacity string (gs command)
"""
w = annot.border["width"] # line width
sc = annot.colors["stroke"] # stroke color
if not sc: # black if missing
sc = (0,0,0)
scol = " ".join(map(str, sc)) + " RG\n"
if fill_color:
fc = fill_color
else:
fc = annot.colors["fill"] # fill color
if not fc:
fc = (1,1,1) # white if missing
fcol = " ".join(map(str, fc)) + " rg\n"
# nr = annot.rect
np1 = p1 # point coord relative to annot rect
np2 = p2 # point coord relative to annot rect
m = Matrix(util_hor_matrix(np1, np2)) # matrix makes the line horizontal
im = ~m # inverted matrix
L = np1 * m # converted start (left) point
R = np2 * m # converted end (right) point
if 0 <= annot.opacity < 1:
opacity = "/H gs\n"
else:
opacity = ""
return m, im, L, R, w, scol, fcol, opacity
@staticmethod
def _le_butt(annot, p1, p2, lr, fill_color):
"""Make stream commands for butt line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 3
d = shift * max(1, w)
M = R if lr else L
top = (M + (0, -d/2.)) * im
bot = (M + (0, d/2.)) * im
ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
ap += "%f %f l\n" % (bot.x, bot.y)
ap += _format_g(w) + " w\n"
ap += scol + "s\nQ\n"
return ap
@staticmethod
def _le_circle(annot, p1, p2, lr, fill_color):
"""Make stream commands for circle line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 2.5 # 2*shift*width = length of square edge
d = shift * max(1, w)
M = R - (d/2., 0) if lr else L + (d/2., 0)
r = Rect(M, M) + (-d, -d, d, d) # the square
ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im)
ap += _format_g(w) + " w\n"
ap += scol + fcol + "b\nQ\n"
return ap
@staticmethod
def _le_closedarrow(annot, p1, p2, lr, fill_color):
"""Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 2.5
d = shift * max(1, w)
p2 = R + (d/2., 0) if lr else L - (d/2., 0)
p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
p1 *= im
p2 *= im
p3 *= im
ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
ap += "%f %f l\n" % (p2.x, p2.y)
ap += "%f %f l\n" % (p3.x, p3.y)
ap += _format_g(w) + " w\n"
ap += scol + fcol + "b\nQ\n"
return ap
@staticmethod
def _le_diamond(annot, p1, p2, lr, fill_color):
"""Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 2.5 # 2*shift*width = length of square edge
d = shift * max(1, w)
M = R - (d/2., 0) if lr else L + (d/2., 0)
r = Rect(M, M) + (-d, -d, d, d) # the square
# the square makes line longer by (2*shift - 1)*width
p = (r.tl + (r.bl - r.tl) * 0.5) * im
ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
p = (r.tl + (r.tr - r.tl) * 0.5) * im
ap += "%f %f l\n" % (p.x, p.y)
p = (r.tr + (r.br - r.tr) * 0.5) * im
ap += "%f %f l\n" % (p.x, p.y)
p = (r.br + (r.bl - r.br) * 0.5) * im
ap += "%f %f l\n" % (p.x, p.y)
ap += _format_g(w) + " w\n"
ap += scol + fcol + "b\nQ\n"
return ap
@staticmethod
def _le_openarrow(annot, p1, p2, lr, fill_color):
"""Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 2.5
d = shift * max(1, w)
p2 = R + (d/2., 0) if lr else L - (d/2., 0)
p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
p1 *= im
p2 *= im
p3 *= im
ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
ap += "%f %f l\n" % (p2.x, p2.y)
ap += "%f %f l\n" % (p3.x, p3.y)
ap += _format_g(w) + " w\n"
ap += scol + "S\nQ\n"
return ap
@staticmethod
def _le_rclosedarrow(annot, p1, p2, lr, fill_color):
"""Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 2.5
d = shift * max(1, w)
p2 = R - (2*d, 0) if lr else L + (2*d, 0)
p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
p1 *= im
p2 *= im
p3 *= im
ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
ap += "%f %f l\n" % (p2.x, p2.y)
ap += "%f %f l\n" % (p3.x, p3.y)
ap += _format_g(w) + " w\n"
ap += scol + fcol + "b\nQ\n"
return ap
@staticmethod
def _le_ropenarrow(annot, p1, p2, lr, fill_color):
"""Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 2.5
d = shift * max(1, w)
p2 = R - (d/3., 0) if lr else L + (d/3., 0)
p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
p1 *= im
p2 *= im
p3 *= im
ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
ap += "%f %f l\n" % (p2.x, p2.y)
ap += "%f %f l\n" % (p3.x, p3.y)
ap += _format_g(w) + " w\n"
ap += scol + fcol + "S\nQ\n"
return ap
@staticmethod
def _le_slash(annot, p1, p2, lr, fill_color):
"""Make stream commands for slash line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
rw = 1.1547 * max(1, w) * 1.0 # makes rect diagonal a 30 deg inclination
M = R if lr else L
r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w)
top = r.tl * im
bot = r.br * im
ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
ap += "%f %f l\n" % (bot.x, bot.y)
ap += _format_g(w) + " w\n"
ap += scol + "s\nQ\n"
return ap
@staticmethod
def _le_square(annot, p1, p2, lr, fill_color):
"""Make stream commands for square line end symbol. "lr" denotes left (False) or right point.
"""
m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
shift = 2.5 # 2*shift*width = length of square edge
d = shift * max(1, w)
M = R - (d/2., 0) if lr else L + (d/2., 0)
r = Rect(M, M) + (-d, -d, d, d) # the square
# the square makes line longer by (2*shift - 1)*width
p = r.tl * im
ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
p = r.tr * im
ap += "%f %f l\n" % (p.x, p.y)
p = r.br * im
ap += "%f %f l\n" % (p.x, p.y)
p = r.bl * im
ap += "%f %f l\n" % (p.x, p.y)
ap += _format_g(w) + " w\n"
ap += scol + fcol + "b\nQ\n"
return ap
@staticmethod
def _oval_string(p1, p2, p3, p4):
"""Return /AP string defining an oval within a 4-polygon provided as points
"""
def bezier(p, q, r):
f = "%f %f %f %f %f %f c\n"
return f % (p.x, p.y, q.x, q.y, r.x, r.y)
kappa = 0.55228474983 # magic number
ml = p1 + (p4 - p1) * 0.5 # middle points ...
mo = p1 + (p2 - p1) * 0.5 # for each ...
mr = p2 + (p3 - p2) * 0.5 # polygon ...
mu = p4 + (p3 - p4) * 0.5 # side
ol1 = ml + (p1 - ml) * kappa # the 8 bezier
ol2 = mo + (p1 - mo) * kappa # helper points
or1 = mo + (p2 - mo) * kappa
or2 = mr + (p2 - mr) * kappa
ur1 = mr + (p3 - mr) * kappa
ur2 = mu + (p3 - mu) * kappa
ul1 = mu + (p4 - mu) * kappa
ul2 = ml + (p4 - ml) * kappa
# now draw, starting from middle point of left side
ap = "%f %f m\n" % (ml.x, ml.y)
ap += bezier(ol1, ol2, mo)
ap += bezier(or1, or2, mr)
ap += bezier(ur1, ur2, mu)
ap += bezier(ul1, ul2, ml)
return ap
@staticmethod
def _parse_da(annot):
if g_use_extra:
val = extra.Tools_parse_da( annot.this)
else:
def Tools__parse_da(annot):
this_annot = annot.this
assert isinstance(this_annot, mupdf.PdfAnnot)
this_annot_obj = mupdf.pdf_annot_obj( this_annot)
pdf = mupdf.pdf_get_bound_document( this_annot_obj)
try:
da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA'))
if not da.m_internal:
trailer = mupdf.pdf_trailer(pdf)
da = mupdf.pdf_dict_getl(trailer,
PDF_NAME('Root'),
PDF_NAME('AcroForm'),
PDF_NAME('DA'),
)
da_str = mupdf.pdf_to_text_string(da)
except Exception:
if g_exceptions_verbose: exception_info()
return
return da_str
val = Tools__parse_da(annot)
if not val:
return ((0,), "", 0)
font = "Helv"
fsize = 12
col = (0, 0, 0)
dat = val.split() # split on any whitespace
for i, item in enumerate(dat):
if item == "Tf":
font = dat[i - 2][1:]
fsize = float(dat[i - 1])
dat[i] = dat[i-1] = dat[i-2] = ""
continue
if item == "g": # unicolor text
col = [(float(dat[i - 1]))]
dat[i] = dat[i-1] = ""
continue
if item == "rg": # RGB colored text
col = [float(f) for f in dat[i - 3:i]]
dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
continue
if item == "k": # CMYK colored text
col = [float(f) for f in dat[i - 4:i]]
dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = ""
continue
val = (col, font, fsize)
return val
@staticmethod
def _reset_widget(annot):
this_annot = annot
this_annot_obj = mupdf.pdf_annot_obj(this_annot)
pdf = mupdf.pdf_get_bound_document(this_annot_obj)
mupdf.pdf_field_reset(pdf, this_annot_obj)
@staticmethod
def _rotate_matrix(page):
pdfpage = page._pdf_page()
if not pdfpage.m_internal:
return JM_py_from_matrix(mupdf.FzMatrix())
return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage))
@staticmethod
def _save_widget(annot, widget):
JM_set_widget_properties(annot, widget)
def _update_da(annot, da_str):
if g_use_extra:
extra.Tools_update_da( annot.this, da_str)
else:
try:
this_annot = annot.this
assert isinstance(this_annot, mupdf.PdfAnnot)
mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str)
mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS')) # /* not supported */
mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC')) # /* not supported */
except Exception:
if g_exceptions_verbose: exception_info()
return
return
@staticmethod
def gen_id():
global TOOLS_JM_UNIQUE_ID
TOOLS_JM_UNIQUE_ID += 1
return TOOLS_JM_UNIQUE_ID
@staticmethod
def glyph_cache_empty():
'''
Empty the glyph cache.
'''
mupdf.fz_purge_glyph_cache()
@staticmethod
def image_profile(stream, keep_image=0):
'''
Metadata of an image binary stream.
'''
return JM_image_profile(stream, keep_image)
@staticmethod
def mupdf_display_errors(on=None):
'''
Set MuPDF error display to True or False.
'''
global JM_mupdf_show_errors
if on is not None:
JM_mupdf_show_errors = bool(on)
return JM_mupdf_show_errors
@staticmethod
def mupdf_display_warnings(on=None):
'''
Set MuPDF warnings display to True or False.
'''
global JM_mupdf_show_warnings
if on is not None:
JM_mupdf_show_warnings = bool(on)
return JM_mupdf_show_warnings
@staticmethod
def mupdf_version():
'''Get version of MuPDF binary build.'''
return mupdf.FZ_VERSION
@staticmethod
def mupdf_warnings(reset=1):
'''
Get the MuPDF warnings/errors with optional reset (default).
'''
# Get any trailing `... repeated <N> times...` message.
mupdf.fz_flush_warnings()
ret = '\n'.join( JM_mupdf_warnings_store)
if reset:
TOOLS.reset_mupdf_warnings()
return ret
@staticmethod
def reset_mupdf_warnings():
global JM_mupdf_warnings_store
JM_mupdf_warnings_store = list()
@staticmethod
def set_aa_level(level):
'''
Set anti-aliasing level.
'''
mupdf.fz_set_aa_level(level)
@staticmethod
def set_annot_stem( stem=None):
global JM_annot_id_stem
if stem is None:
return JM_annot_id_stem
len_ = len(stem) + 1
if len_ > 50:
len_ = 50
JM_annot_id_stem = stem[:50]
return JM_annot_id_stem
@staticmethod
def set_font_width(doc, xref, width):
pdf = _as_pdf_document(doc)
if not pdf:
return False
font = mupdf.pdf_load_object(pdf, xref)
dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts'))
if mupdf.pdf_is_array(dfonts):
n = mupdf.pdf_array_len(dfonts)
for i in range(n):
dfont = mupdf.pdf_array_get(dfonts, i)
warray = mupdf.pdf_new_array(pdf, 3)
mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0))
mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535))
mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width))
mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray)
return True
@staticmethod
def set_graphics_min_line_width(min_line_width):
'''
Set the graphics minimum line width.
'''
mupdf.fz_set_graphics_min_line_width(min_line_width)
@staticmethod
def set_icc( on=0):
"""Set ICC color handling on or off."""
if on:
if mupdf.FZ_ENABLE_ICC:
mupdf.fz_enable_icc()
else:
RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError)
elif mupdf.FZ_ENABLE_ICC:
mupdf.fz_disable_icc()
@staticmethod
def set_low_memory( on=None):
"""Set / unset MuPDF device caching."""
if on is not None:
_globals.no_device_caching = bool(on)
return _globals.no_device_caching
@staticmethod
def set_small_glyph_heights(on=None):
"""Set / unset small glyph heights."""
if on is not None:
_globals.small_glyph_heights = bool(on)
if g_use_extra:
extra.set_small_glyph_heights(_globals.small_glyph_heights)
return _globals.small_glyph_heights
@staticmethod
def set_subset_fontnames(on=None):
'''
Set / unset returning fontnames with their subset prefix.
'''
if on is not None:
_globals.subset_fontnames = bool(on)
return _globals.subset_fontnames
@staticmethod
def show_aa_level():
'''
Show anti-aliasing values.
'''
return dict(
graphics = mupdf.fz_graphics_aa_level(),
text = mupdf.fz_text_aa_level(),
graphics_min_line_width = mupdf.fz_graphics_min_line_width(),
)
@staticmethod
def store_maxsize():
'''
MuPDF store size limit.
'''
# fixme: return gctx->store->max.
return None
@staticmethod
def store_shrink(percent):
'''
Free 'percent' of current store size.
'''
if percent >= 100:
mupdf.fz_empty_store()
return 0
if percent > 0:
mupdf.fz_shrink_store( 100 - percent)
# fixme: return gctx->store->size.
@staticmethod
def store_size():
'''
MuPDF current store size.
'''
# fixme: return gctx->store->size.
return None
@staticmethod
def unset_quad_corrections(on=None):
'''
Set ascender / descender corrections on or off.
'''
if on is not None:
_globals.skip_quad_corrections = bool(on)
return _globals.skip_quad_corrections
# fixme: also defined at top-level.
JM_annot_id_stem = 'fitz'
fitz_config = JM_fitz_config()
# We cannot import utils earlier because it imports this .py file itself and
# uses some pymupdf.* types in function typing.
#
from . import utils
pdfcolor = dict(
[
(k, (r / 255, g / 255, b / 255))
for k, (r, g, b) in utils.getColorInfoDict().items()
]
)
# Callbacks not yet supported with cppyy.
if not mupdf_cppyy:
mupdf.fz_set_warning_callback(JM_mupdf_warning)
mupdf.fz_set_error_callback(JM_mupdf_error)
# If there are pending warnings when we exit, we end up in this sequence:
#
# atexit()
# -> mupdf::internal_thread_state::~internal_thread_state()
# -> fz_drop_context()
# -> fz_flush_warnings()
# -> SWIG Director code
# -> Python calling JM_mupdf_warning().
#
# Unfortunately this causes a SEGV, seemingly because the SWIG Director code has
# already been torn down.
#
# So we use a Python atexit handler to explicitly call fz_flush_warnings();
# this appears to happen early enough for the Director machinery to still
# work. So in the sequence above, fz_flush_warnings() will find that there are
# no pending warnings and will not attempt to call JM_mupdf_warning().
#
def _atexit():
#log( 'PyMuPDF/src/__init__.py:_atexit() called')
mupdf.fz_flush_warnings()
mupdf.fz_set_warning_callback(None)
mupdf.fz_set_error_callback(None)
#log( '_atexit() returning')
atexit.register( _atexit)
# Use utils.*() fns for some class methods.
#
recover_bbox_quad = utils.recover_bbox_quad
recover_char_quad = utils.recover_char_quad
recover_line_quad = utils.recover_line_quad
recover_quad = utils.recover_quad
recover_span_quad = utils.recover_span_quad
Annot.get_text = utils.get_text
Annot.get_textbox = utils.get_textbox
Document._do_links = utils.do_links
Document.del_toc_item = utils.del_toc_item
Document.get_char_widths = utils.get_char_widths
Document.get_oc = utils.get_oc
Document.get_ocmd = utils.get_ocmd
Document.get_page_labels = utils.get_page_labels
Document.get_page_numbers = utils.get_page_numbers
Document.get_page_pixmap = utils.get_page_pixmap
Document.get_page_text = utils.get_page_text
Document.get_toc = utils.get_toc
Document.has_annots = utils.has_annots
Document.has_links = utils.has_links
Document.insert_page = utils.insert_page
Document.new_page = utils.new_page
Document.scrub = utils.scrub
Document.search_page_for = utils.search_page_for
Document.set_metadata = utils.set_metadata
Document.set_oc = utils.set_oc
Document.set_ocmd = utils.set_ocmd
Document.set_page_labels = utils.set_page_labels
Document.set_toc = utils.set_toc
Document.set_toc_item = utils.set_toc_item
Document.subset_fonts = utils.subset_fonts
Document.tobytes = Document.write
Document.xref_copy = utils.xref_copy
IRect.get_area = utils.get_area
Page.apply_redactions = utils.apply_redactions
Page.delete_image = utils.delete_image
Page.delete_widget = utils.delete_widget
Page.draw_bezier = utils.draw_bezier
Page.draw_circle = utils.draw_circle
Page.draw_curve = utils.draw_curve
Page.draw_line = utils.draw_line
Page.draw_oval = utils.draw_oval
Page.draw_polyline = utils.draw_polyline
Page.draw_quad = utils.draw_quad
Page.draw_rect = utils.draw_rect
Page.draw_sector = utils.draw_sector
Page.draw_squiggle = utils.draw_squiggle
Page.draw_zigzag = utils.draw_zigzag
Page.get_image_info = utils.get_image_info
Page.get_image_rects = utils.get_image_rects
Page.get_label = utils.get_label
Page.get_links = utils.get_links
Page.get_pixmap = utils.get_pixmap
Page.get_text = utils.get_text
Page.get_text_blocks = utils.get_text_blocks
Page.get_text_selection = utils.get_text_selection
Page.get_text_words = utils.get_text_words
Page.get_textbox = utils.get_textbox
Page.get_textpage_ocr = utils.get_textpage_ocr
Page.insert_image = utils.insert_image
Page.insert_link = utils.insert_link
Page.insert_text = utils.insert_text
Page.insert_textbox = utils.insert_textbox
Page.insert_htmlbox = utils.insert_htmlbox
Page.new_shape = lambda x: utils.Shape(x)
Page.replace_image = utils.replace_image
Page.search_for = utils.search_for
Page.show_pdf_page = utils.show_pdf_page
Page.update_link = utils.update_link
Page.write_text = utils.write_text
from .table import find_tables
Page.find_tables = find_tables
Rect.get_area = utils.get_area
TextWriter.fill_textbox = utils.fill_textbox
class FitzDeprecation(DeprecationWarning):
pass
def restore_aliases():
warnings.filterwarnings( "once", category=FitzDeprecation)
def showthis(msg, cat, filename, lineno, file=None, line=None):
text = warnings.formatwarning(msg, cat, filename, lineno, line=line)
s = text.find("FitzDeprecation")
if s < 0:
log(text)
return
text = text[s:].splitlines()[0][4:]
log(text)
warnings.showwarning = showthis
def _alias(class_, new_name, legacy_name=None):
'''
Adds an alias for a class_ or module item clled <class_>.<new>.
class_:
Class/module to modify; use None for the current module.
new_name:
String name of existing item, e.g. name of method.
legacy_name:
Name of legacy object to create in <class_>. If None, we generate
from <item> by removing underscores and capitalising the next
letter.
'''
if class_ is None:
class_ = sys.modules[__name__]
if not legacy_name:
legacy_name = ''
capitalise_next = False
for c in new_name:
if c == '_':
capitalise_next = True
elif capitalise_next:
legacy_name += c.upper()
capitalise_next = False
else:
legacy_name += c
new_object = getattr( class_, new_name)
assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}'
if callable( new_object):
def deprecated_function( *args, **kwargs):
if not VersionBind.startswith('1.18'):
warnings.warn(
f'"{legacy_name}" removed from {class_} after v1.19.0 - use "{new_name}".',
category=FitzDeprecation,
)
return new_object( *args, **kwargs)
setattr( class_, legacy_name, deprecated_function)
deprecated_function.__doc__ = (
f'*** Deprecated and removed in version following 1.19.0 - use "{new_name}". ***\n'
f'{new_object.__doc__}'
)
else:
setattr( class_, legacy_name, new_object)
_alias( Annot, 'get_file', 'fileGet')
_alias( Annot, 'get_pixmap')
_alias( Annot, 'get_sound', 'soundGet')
_alias( Annot, 'get_text')
_alias( Annot, 'get_textbox')
_alias( Annot, 'get_textpage', 'getTextPage')
_alias( Annot, 'line_ends')
_alias( Annot, 'set_blendmode', 'setBlendMode')
_alias( Annot, 'set_border')
_alias( Annot, 'set_colors')
_alias( Annot, 'set_flags')
_alias( Annot, 'set_info')
_alias( Annot, 'set_line_ends')
_alias( Annot, 'set_name')
_alias( Annot, 'set_oc', 'setOC')
_alias( Annot, 'set_opacity')
_alias( Annot, 'set_rect')
_alias( Annot, 'update_file', 'fileUpd')
_alias( DisplayList, 'get_pixmap')
_alias( DisplayList, 'get_textpage', 'getTextPage')
_alias( Document, 'chapter_count')
_alias( Document, 'chapter_page_count')
_alias( Document, 'convert_to_pdf', 'convertToPDF')
_alias( Document, 'copy_page')
_alias( Document, 'delete_page')
_alias( Document, 'delete_pages', 'deletePageRange')
_alias( Document, 'embfile_add', 'embeddedFileAdd')
_alias( Document, 'embfile_count', 'embeddedFileCount')
_alias( Document, 'embfile_del', 'embeddedFileDel')
_alias( Document, 'embfile_get', 'embeddedFileGet')
_alias( Document, 'embfile_info', 'embeddedFileInfo')
_alias( Document, 'embfile_names', 'embeddedFileNames')
_alias( Document, 'embfile_upd', 'embeddedFileUpd')
_alias( Document, 'extract_font')
_alias( Document, 'extract_image')
_alias( Document, 'find_bookmark')
_alias( Document, 'fullcopy_page')
_alias( Document, 'get_char_widths')
_alias( Document, 'get_ocgs', 'getOCGs')
_alias( Document, 'get_page_fonts', 'getPageFontList')
_alias( Document, 'get_page_images', 'getPageImageList')
_alias( Document, 'get_page_pixmap')
_alias( Document, 'get_page_text')
_alias( Document, 'get_page_xobjects', 'getPageXObjectList')
_alias( Document, 'get_sigflags', 'getSigFlags')
_alias( Document, 'get_toc', 'getToC')
_alias( Document, 'get_xml_metadata')
_alias( Document, 'insert_page')
_alias( Document, 'insert_pdf', 'insertPDF')
_alias( Document, 'is_dirty')
_alias( Document, 'is_form_pdf', 'isFormPDF')
_alias( Document, 'is_pdf', 'isPDF')
_alias( Document, 'is_reflowable')
_alias( Document, 'is_repaired')
_alias( Document, 'last_location')
_alias( Document, 'load_page')
_alias( Document, 'make_bookmark')
_alias( Document, 'move_page')
_alias( Document, 'needs_pass')
_alias( Document, 'new_page')
_alias( Document, 'next_location')
_alias( Document, 'page_count')
_alias( Document, 'page_cropbox', 'pageCropBox')
_alias( Document, 'page_xref')
_alias( Document, 'pdf_catalog', 'PDFCatalog')
_alias( Document, 'pdf_trailer', 'PDFTrailer')
_alias( Document, 'prev_location', 'previousLocation')
_alias( Document, 'resolve_link')
_alias( Document, 'search_page_for')
_alias( Document, 'set_language')
_alias( Document, 'set_metadata')
_alias( Document, 'set_toc', 'setToC')
_alias( Document, 'set_xml_metadata')
_alias( Document, 'update_object')
_alias( Document, 'update_stream')
_alias( Document, 'xref_is_stream', 'isStream')
_alias( Document, 'xref_length')
_alias( Document, 'xref_object')
_alias( Document, 'xref_stream')
_alias( Document, 'xref_stream_raw')
_alias( Document, 'xref_xml_metadata', 'metadataXML')
_alias( IRect, 'get_area')
_alias( IRect, 'get_area', 'getRectArea')
_alias( IRect, 'include_point')
_alias( IRect, 'include_rect')
_alias( IRect, 'is_empty')
_alias( IRect, 'is_infinite')
_alias( Link, 'is_external')
_alias( Link, 'set_border')
_alias( Link, 'set_colors')
_alias( Matrix, 'is_rectilinear')
_alias( Matrix, 'prerotate', 'preRotate')
_alias( Matrix, 'prescale', 'preScale')
_alias( Matrix, 'preshear', 'preShear')
_alias( Matrix, 'pretranslate', 'preTranslate')
_alias( None, 'get_pdf_now', 'getPDFnow')
_alias( None, 'get_pdf_str', 'getPDFstr')
_alias( None, 'get_text_length')
_alias( None, 'get_text_length', 'getTextlength')
_alias( None, 'image_profile', 'ImageProperties')
_alias( None, 'paper_rect', 'PaperRect')
_alias( None, 'paper_size', 'PaperSize')
_alias( None, 'paper_sizes')
_alias( None, 'planish_line')
_alias( Outline, 'is_external')
_alias( Outline, 'is_open')
_alias( Page, 'add_caret_annot')
_alias( Page, 'add_circle_annot')
_alias( Page, 'add_file_annot')
_alias( Page, 'add_freetext_annot')
_alias( Page, 'add_highlight_annot')
_alias( Page, 'add_ink_annot')
_alias( Page, 'add_line_annot')
_alias( Page, 'add_polygon_annot')
_alias( Page, 'add_polyline_annot')
_alias( Page, 'add_rect_annot')
_alias( Page, 'add_redact_annot')
_alias( Page, 'add_squiggly_annot')
_alias( Page, 'add_stamp_annot')
_alias( Page, 'add_strikeout_annot')
_alias( Page, 'add_text_annot')
_alias( Page, 'add_underline_annot')
_alias( Page, 'add_widget')
_alias( Page, 'clean_contents')
_alias( Page, 'cropbox', 'CropBox')
_alias( Page, 'cropbox_position', 'CropBoxPosition')
_alias( Page, 'delete_annot')
_alias( Page, 'delete_link')
_alias( Page, 'delete_widget')
_alias( Page, 'derotation_matrix')
_alias( Page, 'draw_bezier')
_alias( Page, 'draw_circle')
_alias( Page, 'draw_curve')
_alias( Page, 'draw_line')
_alias( Page, 'draw_oval')
_alias( Page, 'draw_polyline')
_alias( Page, 'draw_quad')
_alias( Page, 'draw_rect')
_alias( Page, 'draw_sector')
_alias( Page, 'draw_squiggle')
_alias( Page, 'draw_zigzag')
_alias( Page, 'first_annot')
_alias( Page, 'first_link')
_alias( Page, 'first_widget')
_alias( Page, 'get_contents')
_alias( Page, 'get_displaylist', 'getDisplayList')
_alias( Page, 'get_drawings')
_alias( Page, 'get_fonts', 'getFontList')
_alias( Page, 'get_image_bbox')
_alias( Page, 'get_images', 'getImageList')
_alias( Page, 'get_links')
_alias( Page, 'get_pixmap')
_alias( Page, 'get_svg_image', 'getSVGimage')
_alias( Page, 'get_text')
_alias( Page, 'get_text_blocks')
_alias( Page, 'get_text_words')
_alias( Page, 'get_textbox')
_alias( Page, 'get_textpage', 'getTextPage')
_alias( Page, 'insert_font')
_alias( Page, 'insert_image')
_alias( Page, 'insert_link')
_alias( Page, 'insert_text')
_alias( Page, 'insert_textbox')
_alias( Page, 'is_wrapped', '_isWrapped')
_alias( Page, 'load_annot')
_alias( Page, 'load_links')
_alias( Page, 'mediabox', 'MediaBox')
_alias( Page, 'mediabox_size', 'MediaBoxSize')
_alias( Page, 'new_shape')
_alias( Page, 'read_contents')
_alias( Page, 'rotation_matrix')
_alias( Page, 'search_for')
_alias( Page, 'set_cropbox', 'setCropBox')
_alias( Page, 'set_mediabox', 'setMediaBox')
_alias( Page, 'set_rotation')
_alias( Page, 'show_pdf_page', 'showPDFpage')
_alias( Page, 'transformation_matrix')
_alias( Page, 'update_link')
_alias( Page, 'wrap_contents')
_alias( Page, 'write_text')
_alias( Pixmap, 'clear_with')
_alias( Pixmap, 'copy', 'copyPixmap')
_alias( Pixmap, 'gamma_with')
_alias( Pixmap, 'invert_irect', 'invertIRect')
_alias( Pixmap, 'pil_save', 'pillowWrite')
_alias( Pixmap, 'pil_tobytes', 'pillowData')
_alias( Pixmap, 'save', 'writeImage')
_alias( Pixmap, 'save', 'writePNG')
_alias( Pixmap, 'set_alpha')
_alias( Pixmap, 'set_dpi', 'setResolution')
_alias( Pixmap, 'set_origin')
_alias( Pixmap, 'set_pixel')
_alias( Pixmap, 'set_rect')
_alias( Pixmap, 'tint_with')
_alias( Pixmap, 'tobytes', 'getImageData')
_alias( Pixmap, 'tobytes', 'getPNGData')
_alias( Pixmap, 'tobytes', 'getPNGdata')
_alias( Quad, 'is_convex')
_alias( Quad, 'is_empty')
_alias( Quad, 'is_rectangular')
_alias( Rect, 'get_area')
_alias( Rect, 'get_area', 'getRectArea')
_alias( Rect, 'include_point')
_alias( Rect, 'include_rect')
_alias( Rect, 'is_empty')
_alias( Rect, 'is_infinite')
_alias( TextWriter, 'fill_textbox')
_alias( TextWriter, 'write_text')
_alias( utils.Shape, 'draw_bezier')
_alias( utils.Shape, 'draw_circle')
_alias( utils.Shape, 'draw_curve')
_alias( utils.Shape, 'draw_line')
_alias( utils.Shape, 'draw_oval')
_alias( utils.Shape, 'draw_polyline')
_alias( utils.Shape, 'draw_quad')
_alias( utils.Shape, 'draw_rect')
_alias( utils.Shape, 'draw_sector')
_alias( utils.Shape, 'draw_squiggle')
_alias( utils.Shape, 'draw_zigzag')
_alias( utils.Shape, 'insert_text')
_alias( utils.Shape, 'insert_textbox')
if 0:
restore_aliases()
__version__ = VersionBind
__doc__ = (
f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
)