-
Notifications
You must be signed in to change notification settings - Fork 0
/
fixActionsIntoDest.py
140 lines (121 loc) · 4.99 KB
/
fixActionsIntoDest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from PyPDF2 import PdfWriter
from PyPDF2._writer import _pdf_objectify
from PyPDF2.generic import AnnotationBuilder, Fit, NameObject
from PyPDF2.generic._data_structures import ArrayObject, DictionaryObject, NumberObject, Destination
from operator import itemgetter
from collections.abc import Callable
from typing import Any
from . import utils
def generateVisitorFor(annot: list) -> Callable[[Any, Any, Any, Any, Any], None] :
annot.pop('runLabel', None)
annot.pop('runRec', None)
def visit(text, transformMatrix, textMatrix, fontDict, fontSize):
if text.strip() == '':
return
text = text.strip()
# in process
if 'runLabel' in annot:
if annot['runLabel'] == '': # already found
return
elif annot['runLabel'].startswith(text):
label = annot['runLabel'] # text is just part of the label
annot['runLabel'] = label[len(text):len(label)].strip()
elif text.startswith(annot['runLabel']): # text more then needed by label
annot['runLabel'] = ''
else: # reset
annot.pop('runLabel', None)
annot.pop('runRec', None)
elif annot['label'].startswith(text): # first potential found
annot['runLabel'] = annot['label'].replace(text, '').strip()
annot['runRec'] = textMatrix
annot['runRec'][5] += fontSize-1
return visit
def concatStrings(annotations : list):
for annot in annotations:
text = ""
for string in sorted(annot['strings'], key=itemgetter("x", "y")):
text += string['text']
annot['text'] = text
def fixWordActionsIntoDest(writer: PdfWriter, save: bool):
annotations = []
# load annotations
for pageIndex in range(len(writer.pages)):
page = writer.pages[pageIndex]
if(utils.ANNOTATIONS not in page):
continue
# extra array for visitor
pageActions = []
annos = []
for annot in page[utils.ANNOTATIONS]:
obj = annot.get_object()
if utils.DESTINATION not in obj and utils.ACTION in obj and obj[utils.ACTION]['/Type'] == utils.ACTION_TYPE:
formatted = utils.formatObj(obj)
formatted['page'] = pageIndex
pageActions.append(formatted)
else:
annos.append(annot)
page[NameObject(utils.ANNOTATIONS)] = ArrayObject(annos)
if len(pageActions) == 0:
continue
annotations.extend(pageActions)
page.extract_text(visitor_text=utils.checktext(pageActions))
# putting text together
concatStrings(annotations)
# dont need to search on early pages because e.g. table 3 will be on a later page then table 2
next = 0
lastPage = 0
for annot in annotations:
# find label
if 'text' not in annot:
continue
text = annot['text']
annot['label'] = text[:text.index(':')+1].replace('\r', '').replace('\n', '')
print(annot['label'])
if lastPage != annot['page']:
next = 0
# find correct destination
for index in range(next, len(writer.pages)):
search = writer.pages[index]
if index == annot['page']:
continue
search.extract_text(visitor_text=generateVisitorFor(annot))
if 'runRec' in annot:
# found label on page 'search'
print(index+1)
rec = annot['runRec']
annotation = utils.buildInternalLink(
rect = annot['obj'][utils.RECTANGLE],
target_page_index = index,
left = rec[4], top = rec[5], zoom=0)# fit_args
# save for later if multiple were found
if 'builded' in annot:
annot['builded'][index] = annotation
else:
annot['builded'] = {index: annotation}
break
lastPage = annot['page']
if 'builded' in annot:
builded = annot['builded']
if len(builded) == 0:
# nothing found
continue
elif len(builded) == 1:
# write single found
page, ann = builded.popitem()
utils.add_annotation(writer, page_number=annot['page'], annotation=ann)
next = page
else:
# wait for valid input
while True:
print("Choose page: ")
input = int(input())
if input in builded:
page, anno = builded[input]
utils.add_annotation(writer, page_number=annot['page'], annotation=anno)
next = page
break
else:
print('Invalid')
if save:
with open("result-an.pdf", "wb") as out:
writer.write(out)