From ed540d68256f3f05e86b7880858249a1e8a17c42 Mon Sep 17 00:00:00 2001 From: Keuin Date: Fri, 2 Jun 2023 00:15:30 +0800 Subject: Cache HTML source on disk --- htmlcache.py | 33 +++++++++++++++++++++++++++++++++ web.py | 23 +++++------------------ 2 files changed, 38 insertions(+), 18 deletions(-) create mode 100644 htmlcache.py diff --git a/htmlcache.py b/htmlcache.py new file mode 100644 index 0000000..2f727a7 --- /dev/null +++ b/htmlcache.py @@ -0,0 +1,33 @@ +import os + +import aiohttp + +import main + + +class TexSourceGenerationError(Exception): + pass + + +class HtmlCache: + + def __init__(self, cache_path: str): + self._cache_path = os.path.abspath(cache_path) + + async def get_utaten_tex_source(self, item_id: str) -> str: + cache_file_path = os.path.join(self._cache_path, f'{item_id}.html') + if os.path.isfile(cache_file_path): + with open(cache_file_path, 'r', encoding='utf-8') as f: + html = f.read() + else: + async with aiohttp.ClientSession() as ses: + async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r: + if not r.ok: + raise TexSourceGenerationError('HTTP request failed when reading page source') + html = await r.text() + try: + with open(cache_file_path, 'w', encoding='utf-8') as f: + f.write(html) + except IOError as e: + print(f'Failed to update cache for song `{item_id}`: {e}') + return main.html_to_tex(html) diff --git a/web.py b/web.py index 007e93d..e2d7fdf 100644 --- a/web.py +++ b/web.py @@ -1,36 +1,23 @@ import re -import aiohttp from fastapi import FastAPI, Response from fastapi.responses import FileResponse -import main +import htmlcache import texgen app = FastAPI() utaten_pattern = re.compile(r'[a-z0-9]+') tex_generator = texgen.TexGenerator('pdf_cache', 'temp', 20) - - -class TexSourceGenerationError(Exception): - pass - - -async def _get_utaten_tex_source(item_id: str) -> str: - async with aiohttp.ClientSession() as ses: - async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r: - if not r.ok: - raise TexSourceGenerationError('HTTP request failed when reading page source') - html = await r.text() - return main.html_to_tex(html) +html_cache = htmlcache.HtmlCache('html_cache') @app.get("/utaten/{item_id}.pdf") async def get_utaten_lyric_pdf(item_id: str): try: print('_get_utaten_tex_source') - tex = await _get_utaten_tex_source(item_id) + tex = await html_cache.get_utaten_tex_source(item_id) print('xelatex') pdf_path = await tex_generator.xelatex(tex) return FileResponse(pdf_path, media_type='application/pdf') @@ -41,7 +28,7 @@ async def get_utaten_lyric_pdf(item_id: str): @app.get("/utaten/{item_id}.tex") async def get_utaten_lyric_tex(item_id: str): try: - tex = await _get_utaten_tex_source(item_id) + tex = await html_cache.get_utaten_tex_source(item_id) return Response(content=tex, media_type='application/x-tex') - except TexSourceGenerationError as e: + except htmlcache.TexSourceGenerationError as e: return Response(content=str(e), status_code=503) -- cgit v1.2.3