summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeuin <[email protected]>2023-06-02 00:15:30 +0800
committerKeuin <[email protected]>2023-06-02 00:15:30 +0800
commited540d68256f3f05e86b7880858249a1e8a17c42 (patch)
treee991424128a0a39578634f00f68e8e3bf5dd4f8b
parent77c5ef83402aa28fc6d3a9d2982ccb03fea70d2f (diff)
Cache HTML source on disk
-rw-r--r--htmlcache.py33
-rw-r--r--web.py23
2 files changed, 38 insertions, 18 deletions
diff --git a/htmlcache.py b/htmlcache.py
new file mode 100644
index 0000000..2f727a7
--- /dev/null
+++ b/htmlcache.py
@@ -0,0 +1,33 @@
+import os
+
+import aiohttp
+
+import main
+
+
+class TexSourceGenerationError(Exception):
+ pass
+
+
+class HtmlCache:
+
+ def __init__(self, cache_path: str):
+ self._cache_path = os.path.abspath(cache_path)
+
+ async def get_utaten_tex_source(self, item_id: str) -> str:
+ cache_file_path = os.path.join(self._cache_path, f'{item_id}.html')
+ if os.path.isfile(cache_file_path):
+ with open(cache_file_path, 'r', encoding='utf-8') as f:
+ html = f.read()
+ else:
+ async with aiohttp.ClientSession() as ses:
+ async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r:
+ if not r.ok:
+ raise TexSourceGenerationError('HTTP request failed when reading page source')
+ html = await r.text()
+ try:
+ with open(cache_file_path, 'w', encoding='utf-8') as f:
+ f.write(html)
+ except IOError as e:
+ print(f'Failed to update cache for song `{item_id}`: {e}')
+ return main.html_to_tex(html)
diff --git a/web.py b/web.py
index 007e93d..e2d7fdf 100644
--- a/web.py
+++ b/web.py
@@ -1,36 +1,23 @@
import re
-import aiohttp
from fastapi import FastAPI, Response
from fastapi.responses import FileResponse
-import main
+import htmlcache
import texgen
app = FastAPI()
utaten_pattern = re.compile(r'[a-z0-9]+')
tex_generator = texgen.TexGenerator('pdf_cache', 'temp', 20)
-
-
-class TexSourceGenerationError(Exception):
- pass
-
-
-async def _get_utaten_tex_source(item_id: str) -> str:
- async with aiohttp.ClientSession() as ses:
- async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r:
- if not r.ok:
- raise TexSourceGenerationError('HTTP request failed when reading page source')
- html = await r.text()
- return main.html_to_tex(html)
+html_cache = htmlcache.HtmlCache('html_cache')
@app.get("/utaten/{item_id}.pdf")
async def get_utaten_lyric_pdf(item_id: str):
try:
print('_get_utaten_tex_source')
- tex = await _get_utaten_tex_source(item_id)
+ tex = await html_cache.get_utaten_tex_source(item_id)
print('xelatex')
pdf_path = await tex_generator.xelatex(tex)
return FileResponse(pdf_path, media_type='application/pdf')
@@ -41,7 +28,7 @@ async def get_utaten_lyric_pdf(item_id: str):
@app.get("/utaten/{item_id}.tex")
async def get_utaten_lyric_tex(item_id: str):
try:
- tex = await _get_utaten_tex_source(item_id)
+ tex = await html_cache.get_utaten_tex_source(item_id)
return Response(content=tex, media_type='application/x-tex')
- except TexSourceGenerationError as e:
+ except htmlcache.TexSourceGenerationError as e:
return Response(content=str(e), status_code=503)