Revert to using timestamp instead of expire in cache - cache cleaning cutoff configurable instead.

2019-05-12 21:01:15 +02:00 · 2019-05-12 21:01:15 +02:00 · c6d1a9ce6e
parent 46634789a3
commit c6d1a9ce6e
2 changed files with 34 additions and 43 deletions
--- a/dailyreleases/cache.py
+++ b/dailyreleases/cache.py
@ -13,6 +13,13 @@ from .config import DATA_DIR, CONFIG

 logger = logging.getLogger(__name__)

+connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
+connection.row_factory = sqlite3.Row  # allow accessing rows by index and case-insensitively by name
+connection.text_factory = bytes  # do not try to decode bytes as utf-8 strings
+
+DEFAULT_CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
+logger.info("Default cache time is %s", DEFAULT_CACHE_TIME)
+

 class Response:
    def __init__(self, bytes: bytes = None) -> None:
@ -24,33 +31,23 @@ class Response:
        return json.loads(self.bytes)


-connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
-connection.row_factory = sqlite3.Row  # allow accessing rows by index and case-insensitively by name
-connection.text_factory = bytes  # do not try to decode bytes as utf-8 strings
-
-CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
-logger.info("Default cache time is %s", CACHE_TIME)
-
-connection.execute(
-    """
+def setup():
+    connection.execute("""
        CREATE TABLE IF NOT EXISTS
        requests (id INTEGER PRIMARY KEY,
                  url TEXT UNIQUE NOT NULL,
                  response BLOB NOT NULL,
-              expire INTEGER NOT NULL);
-    """
-)
+                  timestamp INTEGER NOT NULL);
+        """)


-def clean():
-    connection.execute(
-        """
+def clean(older_than=timedelta(days=3)):
+    connection.execute("""
        DELETE FROM requests
-        WHERE expire < :expire;
+        WHERE timestamp < :cutoff;
        """, {
-            "expire": datetime.utcnow().timestamp(),
-        }
-    )
+            "cutoff": (datetime.utcnow() - older_than).timestamp(),
+        })
    connection.execute("VACUUM;")
    connection.commit()

@ -58,15 +55,11 @@ def clean():
 last_request = defaultdict(float)


-def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
+def get(url: str, params: Mapping = None, cache_time: timedelta = DEFAULT_CACHE_TIME,
        ratelimit: Optional[float] = 1, *args, **kwargs) -> Response:
    """
    Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the
    host-level to this number of requests per second.
-
-    We're saving requests' expire instead of the timestamp it was received to allow for varying cache times; if we were
-    saving the timestamp, clean() wouldn't know when to delete unless the cache time was always the same. This, however,
-    also means that the first call determines for how longer subsequent calls will consider a request fresh.
    """
    if params is not None:
        url += "?" + urllib.parse.urlencode(params)
@ -75,17 +68,15 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,

    #logger.debug("Get %s", url)

-    row = connection.execute(
-        """
-        SELECT response, expire
+    row = connection.execute("""
+        SELECT response, timestamp
        FROM requests
        WHERE url = :url;
        """, {
            "url": url
-        }
-    ).fetchone()
+        }).fetchone()

-    if row is not None and datetime.fromtimestamp(row["expire"]) > datetime.utcnow():
+    if row is not None and datetime.fromtimestamp(row["timestamp"]) > datetime.utcnow() - cache_time:
        #logger.debug("Cache hit: %s", url)
        return Response(row["response"])

@ -100,15 +91,16 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,

    response = Response(urlopen(request).read())
    last_request[request.host] = time.time()
-    connection.execute(
-        """
-        INSERT OR REPLACE INTO requests(url, response, expire)
-        VALUES (:url, :response, :expire);
+    connection.execute("""
+        INSERT OR REPLACE INTO requests(url, response, timestamp)
+        VALUES (:url, :response, :timestamp);
        """, {
            "url": url,
            "response": response.bytes,
-            "expire": (datetime.utcnow() + cache_time).timestamp()
-        }
-    )
+            "timestamp": datetime.utcnow().timestamp()
+        })
    connection.commit()
    return response
+
+
+setup()
--- a/dailyreleases/stores/web.py
+++ b/dailyreleases/stores/web.py
@ -11,8 +11,7 @@ logger = logging.getLogger(__name__)
 def web_search(query: str) -> List[str]:
    logger.debug("Searching Google for %s", query)
    try:
-        # disable rate-limiting since we have a proper API-key (unlike the other APIs we are using)
-        r = cache.get("https://www.googleapis.com/customsearch/v1", ratelimit=None, params={
+        r = cache.get("https://www.googleapis.com/customsearch/v1", params={
            "key": CONFIG["google"]["key"],
            "cx": CONFIG["google"]["cx"],
            "q": query