From 55bb519e964faf42fd480962877909f9e8ce37e3 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 27 Jun 2022 19:31:07 +0700 Subject: [PATCH] refactor to use cache locking instead of schedule --- README.md | 20 ++++-- tilefy/requirements.txt | 2 +- tilefy/src/cache.py | 47 ++++++++++++++ tilefy/src/config_parser.py | 109 ++++++++++++++++++++++++++++++++ tilefy/src/scheduler.py | 105 ------------------------------ tilefy/src/scheduler_rebuild.py | 10 --- tilefy/src/template.py | 23 ++++--- tilefy/src/tilefy_redis.py | 24 ++----- tilefy/src/watcher.py | 7 +- tilefy/views.py | 32 ++++++---- tiles.example.yml | 4 +- 11 files changed, 218 insertions(+), 165 deletions(-) create mode 100644 tilefy/src/cache.py create mode 100644 tilefy/src/config_parser.py delete mode 100644 tilefy/src/scheduler.py delete mode 100644 tilefy/src/scheduler_rebuild.py diff --git a/README.md b/README.md index 78ad8f3..04ac335 100644 --- a/README.md +++ b/README.md @@ -38,11 +38,11 @@ Main Python application to create and serve your tiles, built with Flask. - Set your timezone with the `TZ` environment variable to configure the scheduler, defaults to *UTC*. ### Redis JSON -Functions as a cache and holds the scheduler data storage and history. +Functions as a cache, holds your configurations. - Needs a volume at **/data** to store your configurations permanently. ## Configuration -Create a yml config file where you have mounted your `/data/tiles.yml` folder. Take a look at the provided `tiles.example.yml` for the basic syntax. *tiles* is the top level key, list your tiles below. The main key of the tile is your slug and will become your url, so use no spaces or special characters. +Create a yml config file at `/data/tiles.yml`. Take a look at the provided `tiles.example.yml` for the basic syntax. *tiles* is the top level key, list your tiles below. The main key of the tile is your slug and will become your url, so use no spaces or special characters. ### tile_name Give your tile a unique human readable name. @@ -76,10 +76,18 @@ Provide your custom font by adding them to `/data/fonts`, in TTF format only and Defaults to `true` for all numbers. Shorten long numbers in to a more human readable string, like *14502* to *14.5K*. ### recreate: optional -Recreate tiles periodically, provide your custom schedule as a cron tab or use `on_demand` to recreate the tile for every request. Defaults to `0 0 * * *` aka every day at midnight. Be aware of any rate limiting and API quotas you might face with a too frequent schedule. -Note: -- There is automatically a random jitter for cron tab of 15 secs to avoid parallel requests for a lot of tiles. -- There is a failsafe in place to block recreating tiles faster than every 60 seconds. +Set the lifetime of your tiles and define when the tile will be recreated if requested. Defaults to *1d*, e.g. recreate every day. + +Valid options: +- *120*: A number indicates seconds till expire +- *10min*: Minutes till expire +- *2h*: Hours till expire +- *1d*: Days till expire +- *on_demand*: Will recreate for every request. + +Note: +- Be aware of any rate limiting and API quotas you might face with a too short expiration. +- There is a failsafe in place to block recreating tiles faster than every 60 seconds. ## API requests Get values from a public API by providing the url and key_map. diff --git a/tilefy/requirements.txt b/tilefy/requirements.txt index 44a1478..af74dcf 100644 --- a/tilefy/requirements.txt +++ b/tilefy/requirements.txt @@ -3,6 +3,6 @@ beautifulsoup4==4.11.1 flask==2.1.2 Pillow==9.1.1 PyYAML==6.0 -redis==4.3.3 +redis==4.3.4 requests==2.28.0 uwsgi==2.0.20 diff --git a/tilefy/src/cache.py b/tilefy/src/cache.py new file mode 100644 index 0000000..057fc17 --- /dev/null +++ b/tilefy/src/cache.py @@ -0,0 +1,47 @@ +"""configure scheduled jobs""" + +from redis.connection import ResponseError +from src.template import create_single_tile +from src.tilefy_redis import TilefyRedis + + +class CacheManager: + """handle rebuild cache for tiles""" + + SEC_MAP = { + "min": 60, + "h": 60 * 60, + "d": 60 * 60 * 24, + } + + def __init__(self, tilename): + self.tilename = tilename + self.tile_config = self.get_tile_config() + + def get_tile_config(self): + """get conf from redis""" + path = f"tiles.{self.tilename}" + try: + tile_config = TilefyRedis().get_message("config", path=path) + except ResponseError: + tile_config = False + + return tile_config + + def validate(self): + """validate cache""" + key = f"lock:{self.tilename}" + use_cached = TilefyRedis().get_message(key) + if use_cached: + print(f"{self.tilename}: use cached tile") + return + + create_single_tile(self.tilename, self.tile_config) + + +def clear_locks(): + """clear all locks from redis""" + _redis = TilefyRedis() + all_locks = _redis.get_keys("lock") + for lock in all_locks: + _redis.del_message(lock) diff --git a/tilefy/src/config_parser.py b/tilefy/src/config_parser.py new file mode 100644 index 0000000..54308bc --- /dev/null +++ b/tilefy/src/config_parser.py @@ -0,0 +1,109 @@ +"""parse and load yml""" + +import os +import re + +import yaml +from src.cache import clear_locks +from src.tilefy_redis import TilefyRedis + + +class ConfigFile: + """represent tile.yml file""" + + TILES_CONFIG = "/data/tiles.yml" + VALID_KEYS = [ + "background_color", + "font_color", + "font", + "height", + "humanize", + "key_map", + "logos", + "plugin", + "recreate", + "tile_name", + "url", + "width", + ] + SEC_MAP = { + "min": 60, + "h": 60 * 60, + "d": 60 * 60 * 24, + } + MIN_EXPIRE = 60 + + def __init__(self): + self.exists = os.path.exists(self.TILES_CONFIG) + self.config_raw = False + self.config = False + + def load_yml(self): + """load yml into redis""" + if not self.exists: + print("missing tiles.yml") + return + + self.get_conf() + self.validate_conf() + self.add_expire() + self.save_config() + clear_locks() + + def get_conf(self): + """read config file""" + with open(self.TILES_CONFIG, "r", encoding="utf-8") as yml_file: + file_content = yml_file.read() + self.config_raw = yaml.load(file_content, Loader=yaml.CLoader) + + def validate_conf(self): + """check provided config file""" + print(f"{self.TILES_CONFIG}: validate") + all_tiles = self.config_raw.get("tiles") + if not all_tiles: + raise ValueError("missing tiles key") + + for tile_name, tile_conf in all_tiles.items(): + for tile_conf_key in tile_conf: + if tile_conf_key not in self.VALID_KEYS: + message = f"{tile_name}: unexpected key {tile_conf_key}" + raise ValueError(message) + + self.config = self.config_raw.copy() + + def add_expire(self): + """add expire_sec to tile_conf""" + all_tiles = self.config.get("tiles") + for tile_conf in all_tiles.values(): + expire = self._build_expire(tile_conf) + tile_conf.update({"recreate_sec": expire}) + + def _build_expire(self, tile_config): + """validate config recreate return parsed secs""" + recreate = tile_config.get("recreate", False) + if not recreate: + return self.SEC_MAP["d"] + + if isinstance(recreate, int): + if recreate < self.MIN_EXPIRE: + return self.MIN_EXPIRE + + return recreate + + if recreate == "on_demand": + return self.MIN_EXPIRE + + try: + value, unit = re.findall(r"[a-z]+|\d+", recreate.lower()) + except ValueError as err: + print(f"failed to extract value and unit of {recreate}") + raise err + + if unit not in self.SEC_MAP: + raise ValueError(f"unit not in {self.SEC_MAP.keys()}") + + return int(value) * self.SEC_MAP.get(unit) + + def save_config(self): + """save config in redis""" + TilefyRedis().set_message("config", self.config) diff --git a/tilefy/src/scheduler.py b/tilefy/src/scheduler.py deleted file mode 100644 index b82c239..0000000 --- a/tilefy/src/scheduler.py +++ /dev/null @@ -1,105 +0,0 @@ -"""configure scheduled jobs""" - -from os import environ - -from apscheduler.schedulers.background import BackgroundScheduler -from apscheduler.triggers.cron import CronTrigger -from src.template import create_single_tile -from src.tilefy_redis import TilefyRedis -from src.watcher import watch_yml - - -class TilefyScheduler: - """interact with scheduler""" - - CRON_DEFAULT = "0 0 * * *" - - def __init__(self): - self.scheduler = BackgroundScheduler(timezone=environ.get("TZ", "UTC")) - self.add_job_store() - self.tiles = self.get_tiles() - - def get_tiles(self): - """get all tiles set in config""" - config = TilefyRedis().get_message("config") - if not config: - print("no tiles defined in tiles.yml") - return False - - return config["tiles"] - - def setup_schedule(self): - """startup""" - if not self.tiles: - print("no tiles defined in tiles.yml") - return - - jobs = self.build_jobs() - self.add_jobs(jobs) - self.add_watcher() - - if not self.scheduler.running: - self.scheduler.start() - - def add_job_store(self): - """add jobstore to scheudler""" - self.scheduler.add_jobstore( - "redis", - jobs_key="tl:jobs", - run_times_key="tl:run_times", - host=environ.get("REDIS_HOST"), - port=environ.get("REDIS_PORT"), - ) - - def clear_old(self): - """remove old jobs before recreating""" - if not self.scheduler.running: - self.scheduler.start() - - all_jobs = self.scheduler.get_jobs() - for job in all_jobs: - if job.id == "watcher": - continue - self.scheduler.remove_job(job.id) - - def build_jobs(self): - """build list of expected jobs""" - jobs = [] - for idx, (tile_slug, tile_conf) in enumerate(self.tiles.items()): - job = { - "job_id": str(idx), - "job_name": tile_slug, - "tile_conf": tile_conf, - } - jobs.append(job) - - return jobs - - def add_jobs(self, jobs): - """add jobs to scheduler""" - for job in jobs: - cron_tab = job["tile_conf"].get("recreate", self.CRON_DEFAULT) - if cron_tab == "on_demand": - continue - - job_name = job["job_name"] - self.scheduler.add_job( - create_single_tile, - CronTrigger.from_crontab(cron_tab), - id=job["job_id"], - name=job_name, - args=[job_name, job["tile_conf"]], - jitter=15, - replace_existing=True, - ) - print(f"{job_name}: Add job {cron_tab}") - - def add_watcher(self): - """add watcher to jobs""" - self.scheduler.add_job( - watch_yml, - "interval", - seconds=5, - id="watcher", - replace_existing=True, - ) diff --git a/tilefy/src/scheduler_rebuild.py b/tilefy/src/scheduler_rebuild.py deleted file mode 100644 index ef922e3..0000000 --- a/tilefy/src/scheduler_rebuild.py +++ /dev/null @@ -1,10 +0,0 @@ -"""rebuild jobs in scheduler""" - -from src import scheduler - - -def rebuild(): - """rebuild""" - handler = scheduler.TilefyScheduler() - handler.clear_old() - handler.setup_schedule() diff --git a/tilefy/src/template.py b/tilefy/src/template.py index 6edd6a1..313457d 100644 --- a/tilefy/src/template.py +++ b/tilefy/src/template.py @@ -115,12 +115,19 @@ def create_all_tiles(): def create_single_tile(tile_slug, tile_config): """create a single tile""" - key = f"lock:{tile_slug}" - locked = TilefyRedis().get_message(key) - if locked: - print(f"{tile_slug}: skip rebuild within 60secs") - return - TileImage(tile_slug, tile_config).build_tile() - message = {"recreate": int(datetime.now().strftime("%s"))} - TilefyRedis().set_message(key, message, expire=60) + + now = datetime.now() + date_format = "%Y-%m-%d %H:%M:%S" + expire_sec = tile_config["recreate_sec"] + expire_epoch = int(now.strftime("%s")) + expire_sec + expire_str = datetime.fromtimestamp(expire_epoch).strftime(date_format) + + message = { + "recreated": int(now.strftime("%s")), + "recreated_str": now.strftime(date_format), + "expire": expire_epoch, + "expire_str": expire_str, + } + + TilefyRedis().set_message(f"lock:{tile_slug}", message, expire=expire_sec) diff --git a/tilefy/src/tilefy_redis.py b/tilefy/src/tilefy_redis.py index 471cc58..c1d4bc4 100644 --- a/tilefy/src/tilefy_redis.py +++ b/tilefy/src/tilefy_redis.py @@ -4,9 +4,6 @@ import json import os import redis -import yaml - -TILES_CONFIG = "/data/tiles.yml" class RedisBase: @@ -41,20 +38,13 @@ class TilefyRedis(RedisBase): return False + def get_keys(self, key): + """get list of all key matches""" + command = f"{self.NAME_SPACE}{key}:*" + all_keys = self.conn.execute_command("KEYS", command) + + return [i.decode().split(self.NAME_SPACE)[1] for i in all_keys] + def del_message(self, key): """delete message from redis""" self.conn.execute_command("JSON.DEL", self.NAME_SPACE + key) - - -def load_yml(): - """read yml file""" - - if not os.path.exists(TILES_CONFIG): - print("missing tiles.yml") - return - - with open(TILES_CONFIG, "r", encoding="utf-8") as yml_file: - file_content = yml_file.read() - config_raw = yaml.load(file_content, Loader=yaml.CLoader) - - TilefyRedis().set_message("config", config_raw) diff --git a/tilefy/src/watcher.py b/tilefy/src/watcher.py index 4853b67..d138aa5 100644 --- a/tilefy/src/watcher.py +++ b/tilefy/src/watcher.py @@ -3,9 +3,9 @@ import hashlib import os -from src.scheduler_rebuild import rebuild +from src.config_parser import ConfigFile from src.template import create_all_tiles -from src.tilefy_redis import TilefyRedis, load_yml +from src.tilefy_redis import TilefyRedis class Watcher: @@ -22,10 +22,9 @@ class Watcher: modified = self.is_changed() if modified: print(f"{self.FILE_PATH}: modified") - load_yml() + ConfigFile().load_yml() create_all_tiles() self._store_last() - rebuild() def is_changed(self): """check if file has changed""" diff --git a/tilefy/views.py b/tilefy/views.py index ec7726c..e5b26ad 100644 --- a/tilefy/views.py +++ b/tilefy/views.py @@ -2,17 +2,22 @@ import os -from flask import Flask, render_template, send_from_directory -from src.scheduler import TilefyScheduler -from src.template import create_all_tiles, create_single_tile -from src.tilefy_redis import TilefyRedis, load_yml +from apscheduler.schedulers.background import BackgroundScheduler +from flask import Flask, Response, render_template, send_from_directory +from src.cache import CacheManager +from src.config_parser import ConfigFile +from src.template import create_all_tiles +from src.tilefy_redis import TilefyRedis +from src.watcher import watch_yml app = Flask(__name__) - -load_yml() -TilefyScheduler().setup_schedule() +ConfigFile().load_yml() create_all_tiles() +scheduler = BackgroundScheduler(timezone=os.environ.get("TZ", "UTC")) +scheduler.add_job(watch_yml, "interval", seconds=5) +scheduler.start() + @app.route("/") def home(): @@ -33,13 +38,16 @@ def home(): def get_tile(tile_path): """return tile as image""" tilename = os.path.splitext(tile_path)[0] - tile_config = TilefyRedis().get_message("config", path=f"tiles.{tilename}") - recreate = tile_config.get("recreate") - if recreate == "on_demand": - create_single_tile(tilename, tile_config) + + cache_handler = CacheManager(tilename) + if not cache_handler.tile_config: + print(f"tile not found: {tilename}") + return Response("tile not found", status=404) + + cache_handler.validate() return send_from_directory( directory="/data/tiles", path=tile_path, - cache_timeout=100, + cache_timeout=60, ) diff --git a/tiles.example.yml b/tiles.example.yml index 7df621c..2622970 100644 --- a/tiles.example.yml +++ b/tiles.example.yml @@ -13,7 +13,7 @@ tiles: url: https://hub.docker.com/v2/repositories/bbilly1/tubearchivist/ key_map: - pull_count - recreate: "0 * * * *" + recreate: "1d" tubearchivist-github-star: tile_name: Tube Archivist GitHub Stars background_color: "#00202f" @@ -28,7 +28,7 @@ tiles: - stargazers_count humanize: false font: ttf-bitstream-vera/VeraMono.ttf - recreate: "1 * * * *" + recreate: "12h" tubearchivist-firefox: tile_name: TA Companion Firefox users background_color: "#00202f"