summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeuin <[email protected]>2023-06-01 20:14:00 +0800
committerKeuin <[email protected]>2023-06-01 20:14:00 +0800
commit77c5ef83402aa28fc6d3a9d2982ccb03fea70d2f (patch)
treee86ca5b3191060a9966cf92e300a6c6679129e45
parent093c815e9e3ee8aecf92cb1de7b51337ca28c16e (diff)
texgen (WIP)
-rw-r--r--texgen.py65
-rw-r--r--web.py38
2 files changed, 94 insertions, 9 deletions
diff --git a/texgen.py b/texgen.py
new file mode 100644
index 0000000..851df83
--- /dev/null
+++ b/texgen.py
@@ -0,0 +1,65 @@
+import asyncio
+import contextlib
+import hashlib
+import os
+import shutil
+import typing
+import subprocess
+
+
+def temp_dir(path: str):
+ os.mkdir(path)
+ try:
+ yield path
+ finally:
+ # shutil.rmtree(path)
+ pass
+
+
+class TexGenerationError(Exception):
+ pass
+
+
+class TexGenerator:
+
+ def __init__(self, storage_path: str, temp_path: str, task_timeout):
+ self._storage_path = os.path.abspath(storage_path)
+ self._temp_path = os.path.abspath(temp_path)
+ self._task_timeout = task_timeout
+ for p in (self._temp_path, self._storage_path):
+ try:
+ os.mkdir(p)
+ except FileExistsError:
+ pass
+
+ async def xelatex(self, tex_source: str) -> str:
+ tex_hash = hashlib.sha256(tex_source.encode('utf-8')).hexdigest()
+ cache_file_path = os.path.join(self._storage_path, f'{tex_hash}.pdf')
+ if os.path.exists(cache_file_path):
+ return cache_file_path
+ with temp_dir(os.path.join(self._temp_path, os.urandom(24).hex())) as workdir:
+ job_name = 'texput'
+ with subprocess.Popen(
+ [
+ 'xelatex',
+ '-interaction=nonstopmode',
+ '-halt-on-error',
+ # this seems not working, as the output file name is always set to default `texput.pdf`
+ f'-jobname={job_name}',
+ ],
+ cwd=workdir,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True,
+ shell=True,
+ ) as proc:
+ stdout, stderr = proc.communicate(input=tex_source, timeout=self._task_timeout)
+ print('STDOUT', stdout)
+ print('STDERR', stderr)
+ if proc.returncode != 0:
+ raise TexGenerationError(f'xelatex process exited with non-zero code {proc.returncode}')
+
+ os.rename(os.path.join(workdir, f'{job_name}.pdf'), cache_file_path)
+ return cache_file_path
diff --git a/web.py b/web.py
index 7aadac4..007e93d 100644
--- a/web.py
+++ b/web.py
@@ -2,26 +2,46 @@ import re
import aiohttp
from fastapi import FastAPI, Response
+from fastapi.responses import FileResponse
import main
+import texgen
app = FastAPI()
utaten_pattern = re.compile(r'[a-z0-9]+')
+tex_generator = texgen.TexGenerator('pdf_cache', 'temp', 20)
[email protected]("/utaten/{item_id}/pdf")
-async def get_utaten_lyric_pdf(item_id: str):
- raise NotImplementedError
+class TexSourceGenerationError(Exception):
+ pass
[email protected]("/utaten/{item_id}.tex")
-async def get_utaten_lyric_pdf(item_id: str, resp: Response):
+async def _get_utaten_tex_source(item_id: str) -> str:
async with aiohttp.ClientSession() as ses:
async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r:
if not r.ok:
- resp.status_code = 503
- return
+ raise TexSourceGenerationError('HTTP request failed when reading page source')
html = await r.text()
- tex = main.html_to_tex(html)
- return Response(content=tex, media_type='application/x-tex')
+ return main.html_to_tex(html)
+
+
[email protected]("/utaten/{item_id}.pdf")
+async def get_utaten_lyric_pdf(item_id: str):
+ try:
+ print('_get_utaten_tex_source')
+ tex = await _get_utaten_tex_source(item_id)
+ print('xelatex')
+ pdf_path = await tex_generator.xelatex(tex)
+ return FileResponse(pdf_path, media_type='application/pdf')
+ except texgen.TexGenerationError as e:
+ return Response(content=f'Failed to generate tex file: {e}', status_code=502)
+
+
[email protected]("/utaten/{item_id}.tex")
+async def get_utaten_lyric_tex(item_id: str):
+ try:
+ tex = await _get_utaten_tex_source(item_id)
+ return Response(content=tex, media_type='application/x-tex')
+ except TexSourceGenerationError as e:
+ return Response(content=str(e), status_code=503)