From 77c5ef83402aa28fc6d3a9d2982ccb03fea70d2f Mon Sep 17 00:00:00 2001 From: Keuin Date: Thu, 1 Jun 2023 20:14:00 +0800 Subject: texgen (WIP) --- texgen.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ web.py | 38 ++++++++++++++++++++++++++++--------- 2 files changed, 94 insertions(+), 9 deletions(-) create mode 100644 texgen.py diff --git a/texgen.py b/texgen.py new file mode 100644 index 0000000..851df83 --- /dev/null +++ b/texgen.py @@ -0,0 +1,65 @@ +import asyncio +import contextlib +import hashlib +import os +import shutil +import typing +import subprocess + + +@contextlib.contextmanager +def temp_dir(path: str): + os.mkdir(path) + try: + yield path + finally: + # shutil.rmtree(path) + pass + + +class TexGenerationError(Exception): + pass + + +class TexGenerator: + + def __init__(self, storage_path: str, temp_path: str, task_timeout): + self._storage_path = os.path.abspath(storage_path) + self._temp_path = os.path.abspath(temp_path) + self._task_timeout = task_timeout + for p in (self._temp_path, self._storage_path): + try: + os.mkdir(p) + except FileExistsError: + pass + + async def xelatex(self, tex_source: str) -> str: + tex_hash = hashlib.sha256(tex_source.encode('utf-8')).hexdigest() + cache_file_path = os.path.join(self._storage_path, f'{tex_hash}.pdf') + if os.path.exists(cache_file_path): + return cache_file_path + with temp_dir(os.path.join(self._temp_path, os.urandom(24).hex())) as workdir: + job_name = 'texput' + with subprocess.Popen( + [ + 'xelatex', + '-interaction=nonstopmode', + '-halt-on-error', + # this seems not working, as the output file name is always set to default `texput.pdf` + f'-jobname={job_name}', + ], + cwd=workdir, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + shell=True, + ) as proc: + stdout, stderr = proc.communicate(input=tex_source, timeout=self._task_timeout) + print('STDOUT', stdout) + print('STDERR', stderr) + if proc.returncode != 0: + raise TexGenerationError(f'xelatex process exited with non-zero code {proc.returncode}') + + os.rename(os.path.join(workdir, f'{job_name}.pdf'), cache_file_path) + return cache_file_path diff --git a/web.py b/web.py index 7aadac4..007e93d 100644 --- a/web.py +++ b/web.py @@ -2,26 +2,46 @@ import re import aiohttp from fastapi import FastAPI, Response +from fastapi.responses import FileResponse import main +import texgen app = FastAPI() utaten_pattern = re.compile(r'[a-z0-9]+') +tex_generator = texgen.TexGenerator('pdf_cache', 'temp', 20) -@app.get("/utaten/{item_id}/pdf") -async def get_utaten_lyric_pdf(item_id: str): - raise NotImplementedError +class TexSourceGenerationError(Exception): + pass -@app.get("/utaten/{item_id}.tex") -async def get_utaten_lyric_pdf(item_id: str, resp: Response): +async def _get_utaten_tex_source(item_id: str) -> str: async with aiohttp.ClientSession() as ses: async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r: if not r.ok: - resp.status_code = 503 - return + raise TexSourceGenerationError('HTTP request failed when reading page source') html = await r.text() - tex = main.html_to_tex(html) - return Response(content=tex, media_type='application/x-tex') + return main.html_to_tex(html) + + +@app.get("/utaten/{item_id}.pdf") +async def get_utaten_lyric_pdf(item_id: str): + try: + print('_get_utaten_tex_source') + tex = await _get_utaten_tex_source(item_id) + print('xelatex') + pdf_path = await tex_generator.xelatex(tex) + return FileResponse(pdf_path, media_type='application/pdf') + except texgen.TexGenerationError as e: + return Response(content=f'Failed to generate tex file: {e}', status_code=502) + + +@app.get("/utaten/{item_id}.tex") +async def get_utaten_lyric_tex(item_id: str): + try: + tex = await _get_utaten_tex_source(item_id) + return Response(content=tex, media_type='application/x-tex') + except TexSourceGenerationError as e: + return Response(content=str(e), status_code=503) -- cgit v1.2.3