diff options
author | Keuin <[email protected]> | 2023-06-01 20:14:00 +0800 |
---|---|---|
committer | Keuin <[email protected]> | 2023-06-01 20:14:00 +0800 |
commit | 77c5ef83402aa28fc6d3a9d2982ccb03fea70d2f (patch) | |
tree | e86ca5b3191060a9966cf92e300a6c6679129e45 | |
parent | 093c815e9e3ee8aecf92cb1de7b51337ca28c16e (diff) |
texgen (WIP)
-rw-r--r-- | texgen.py | 65 | ||||
-rw-r--r-- | web.py | 38 |
2 files changed, 94 insertions, 9 deletions
diff --git a/texgen.py b/texgen.py new file mode 100644 index 0000000..851df83 --- /dev/null +++ b/texgen.py @@ -0,0 +1,65 @@ +import asyncio +import contextlib +import hashlib +import os +import shutil +import typing +import subprocess + + +def temp_dir(path: str): + os.mkdir(path) + try: + yield path + finally: + # shutil.rmtree(path) + pass + + +class TexGenerationError(Exception): + pass + + +class TexGenerator: + + def __init__(self, storage_path: str, temp_path: str, task_timeout): + self._storage_path = os.path.abspath(storage_path) + self._temp_path = os.path.abspath(temp_path) + self._task_timeout = task_timeout + for p in (self._temp_path, self._storage_path): + try: + os.mkdir(p) + except FileExistsError: + pass + + async def xelatex(self, tex_source: str) -> str: + tex_hash = hashlib.sha256(tex_source.encode('utf-8')).hexdigest() + cache_file_path = os.path.join(self._storage_path, f'{tex_hash}.pdf') + if os.path.exists(cache_file_path): + return cache_file_path + with temp_dir(os.path.join(self._temp_path, os.urandom(24).hex())) as workdir: + job_name = 'texput' + with subprocess.Popen( + [ + 'xelatex', + '-interaction=nonstopmode', + '-halt-on-error', + # this seems not working, as the output file name is always set to default `texput.pdf` + f'-jobname={job_name}', + ], + cwd=workdir, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + shell=True, + ) as proc: + stdout, stderr = proc.communicate(input=tex_source, timeout=self._task_timeout) + print('STDOUT', stdout) + print('STDERR', stderr) + if proc.returncode != 0: + raise TexGenerationError(f'xelatex process exited with non-zero code {proc.returncode}') + + os.rename(os.path.join(workdir, f'{job_name}.pdf'), cache_file_path) + return cache_file_path @@ -2,26 +2,46 @@ import re import aiohttp from fastapi import FastAPI, Response +from fastapi.responses import FileResponse import main +import texgen app = FastAPI() utaten_pattern = re.compile(r'[a-z0-9]+') +tex_generator = texgen.TexGenerator('pdf_cache', 'temp', 20) [email protected]("/utaten/{item_id}/pdf") -async def get_utaten_lyric_pdf(item_id: str): - raise NotImplementedError +class TexSourceGenerationError(Exception): + pass [email protected]("/utaten/{item_id}.tex") -async def get_utaten_lyric_pdf(item_id: str, resp: Response): +async def _get_utaten_tex_source(item_id: str) -> str: async with aiohttp.ClientSession() as ses: async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r: if not r.ok: - resp.status_code = 503 - return + raise TexSourceGenerationError('HTTP request failed when reading page source') html = await r.text() - tex = main.html_to_tex(html) - return Response(content=tex, media_type='application/x-tex') + return main.html_to_tex(html) + + [email protected]("/utaten/{item_id}.pdf") +async def get_utaten_lyric_pdf(item_id: str): + try: + print('_get_utaten_tex_source') + tex = await _get_utaten_tex_source(item_id) + print('xelatex') + pdf_path = await tex_generator.xelatex(tex) + return FileResponse(pdf_path, media_type='application/pdf') + except texgen.TexGenerationError as e: + return Response(content=f'Failed to generate tex file: {e}', status_code=502) + + [email protected]("/utaten/{item_id}.tex") +async def get_utaten_lyric_tex(item_id: str): + try: + tex = await _get_utaten_tex_source(item_id) + return Response(content=tex, media_type='application/x-tex') + except TexSourceGenerationError as e: + return Response(content=str(e), status_code=503) |