diff options
author | Keuin <[email protected]> | 2023-06-02 00:15:30 +0800 |
---|---|---|
committer | Keuin <[email protected]> | 2023-06-02 00:15:30 +0800 |
commit | ed540d68256f3f05e86b7880858249a1e8a17c42 (patch) | |
tree | e991424128a0a39578634f00f68e8e3bf5dd4f8b | |
parent | 77c5ef83402aa28fc6d3a9d2982ccb03fea70d2f (diff) |
Cache HTML source on disk
-rw-r--r-- | htmlcache.py | 33 | ||||
-rw-r--r-- | web.py | 23 |
2 files changed, 38 insertions, 18 deletions
diff --git a/htmlcache.py b/htmlcache.py new file mode 100644 index 0000000..2f727a7 --- /dev/null +++ b/htmlcache.py @@ -0,0 +1,33 @@ +import os + +import aiohttp + +import main + + +class TexSourceGenerationError(Exception): + pass + + +class HtmlCache: + + def __init__(self, cache_path: str): + self._cache_path = os.path.abspath(cache_path) + + async def get_utaten_tex_source(self, item_id: str) -> str: + cache_file_path = os.path.join(self._cache_path, f'{item_id}.html') + if os.path.isfile(cache_file_path): + with open(cache_file_path, 'r', encoding='utf-8') as f: + html = f.read() + else: + async with aiohttp.ClientSession() as ses: + async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r: + if not r.ok: + raise TexSourceGenerationError('HTTP request failed when reading page source') + html = await r.text() + try: + with open(cache_file_path, 'w', encoding='utf-8') as f: + f.write(html) + except IOError as e: + print(f'Failed to update cache for song `{item_id}`: {e}') + return main.html_to_tex(html) @@ -1,36 +1,23 @@ import re -import aiohttp from fastapi import FastAPI, Response from fastapi.responses import FileResponse -import main +import htmlcache import texgen app = FastAPI() utaten_pattern = re.compile(r'[a-z0-9]+') tex_generator = texgen.TexGenerator('pdf_cache', 'temp', 20) - - -class TexSourceGenerationError(Exception): - pass - - -async def _get_utaten_tex_source(item_id: str) -> str: - async with aiohttp.ClientSession() as ses: - async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r: - if not r.ok: - raise TexSourceGenerationError('HTTP request failed when reading page source') - html = await r.text() - return main.html_to_tex(html) +html_cache = htmlcache.HtmlCache('html_cache') @app.get("/utaten/{item_id}.pdf") async def get_utaten_lyric_pdf(item_id: str): try: print('_get_utaten_tex_source') - tex = await _get_utaten_tex_source(item_id) + tex = await html_cache.get_utaten_tex_source(item_id) print('xelatex') pdf_path = await tex_generator.xelatex(tex) return FileResponse(pdf_path, media_type='application/pdf') @@ -41,7 +28,7 @@ async def get_utaten_lyric_pdf(item_id: str): @app.get("/utaten/{item_id}.tex") async def get_utaten_lyric_tex(item_id: str): try: - tex = await _get_utaten_tex_source(item_id) + tex = await html_cache.get_utaten_tex_source(item_id) return Response(content=tex, media_type='application/x-tex') - except TexSourceGenerationError as e: + except htmlcache.TexSourceGenerationError as e: return Response(content=str(e), status_code=503) |