-
Notifications
You must be signed in to change notification settings - Fork 0
/
client.py
32 lines (23 loc) · 884 Bytes
/
client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import asyncio
import time
import fitz
import websockets
from loguru import logger
async def ocr_pdf_page_by_page():
uri = "ws://localhost:8765"
file_path = "your-pdf.pdf"
async with websockets.connect(uri) as websocket:
start_time = time.perf_counter()
doc = fitz.open(file_path)
for page_num in range(doc.page_count):
tmp_doc = fitz.open()
tmp_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
await websocket.send(tmp_doc.tobytes())
logger.info("Page: %s sent", page_num)
text = await websocket.recv()
logger.info("Page <%s> content: \n %s", page_num, text)
await websocket.close()
end_time = time.perf_counter()
logger.info("Elapsed: %s\n", end_time - start_time)
if __name__ == "__main__":
asyncio.run(ocr_pdf_page_by_page())