Revert to using timestamp instead of expire in cache - cache cleaning cutoff configurable instead.

2019-05-12 21:01:15 +02:00 · 2019-05-12 21:01:15 +02:00 · c6d1a9ce6e
parent 46634789a3
commit c6d1a9ce6e
2 changed files with 34 additions and 43 deletions
--- a/dailyreleases/cache.py
+++ b/dailyreleases/cache.py
@ -13,6 +13,13 @@ from .config import DATA_DIR, CONFIG
 logger = logging.getLogger(__name__)
 connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
 connection.row_factory = sqlite3.Row  # allow accessing rows by index and case-insensitively by name
 connection.text_factory = bytes  # do not try to decode bytes as utf-8 strings
 DEFAULT_CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
 logger.info("Default cache time is %s", DEFAULT_CACHE_TIME)
 class Response:
    def __init__(self, bytes: bytes = None) -> None:
@ -24,33 +31,23 @@ class Response:
        return json.loads(self.bytes)
-connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
+def setup():
-connection.row_factory = sqlite3.Row  # allow accessing rows by index and case-insensitively by name
+    connection.execute("""
 connection.text_factory = bytes  # do not try to decode bytes as utf-8 strings
 CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
 logger.info("Default cache time is %s", CACHE_TIME)
 connection.execute(
    """
        CREATE TABLE IF NOT EXISTS
        requests (id INTEGER PRIMARY KEY,
                  url TEXT UNIQUE NOT NULL,
                  response BLOB NOT NULL,
-              expire INTEGER NOT NULL);
+                  timestamp INTEGER NOT NULL);
-    """
+        """)
 )
-def clean():
+def clean(older_than=timedelta(days=3)):
-    connection.execute(
+    connection.execute("""
        """
        DELETE FROM requests
-        WHERE expire < :expire;
+        WHERE timestamp < :cutoff;
        """, {
-            "expire": datetime.utcnow().timestamp(),
+            "cutoff": (datetime.utcnow() - older_than).timestamp(),
-        }
+        })
    )
    connection.execute("VACUUM;")
    connection.commit()
@ -58,15 +55,11 @@ def clean():
 last_request = defaultdict(float)
-def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
+def get(url: str, params: Mapping = None, cache_time: timedelta = DEFAULT_CACHE_TIME,
        ratelimit: Optional[float] = 1, *args, **kwargs) -> Response:
    """
    Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the
    host-level to this number of requests per second.
    We're saving requests' expire instead of the timestamp it was received to allow for varying cache times; if we were
    saving the timestamp, clean() wouldn't know when to delete unless the cache time was always the same. This, however,
    also means that the first call determines for how longer subsequent calls will consider a request fresh.
    """
    if params is not None:
        url += "?" + urllib.parse.urlencode(params)
@ -75,17 +68,15 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
    #logger.debug("Get %s", url)
-    row = connection.execute(
+    row = connection.execute("""
-        """
+        SELECT response, timestamp
        SELECT response, expire
        FROM requests
        WHERE url = :url;
        """, {
            "url": url
-        }
+        }).fetchone()
    ).fetchone()
-    if row is not None and datetime.fromtimestamp(row["expire"]) > datetime.utcnow():
+    if row is not None and datetime.fromtimestamp(row["timestamp"]) > datetime.utcnow() - cache_time:
        #logger.debug("Cache hit: %s", url)
        return Response(row["response"])
@ -100,15 +91,16 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
    response = Response(urlopen(request).read())
    last_request[request.host] = time.time()
-    connection.execute(
+    connection.execute("""
-        """
+        INSERT OR REPLACE INTO requests(url, response, timestamp)
-        INSERT OR REPLACE INTO requests(url, response, expire)
+        VALUES (:url, :response, :timestamp);
        VALUES (:url, :response, :expire);
        """, {
            "url": url,
            "response": response.bytes,
-            "expire": (datetime.utcnow() + cache_time).timestamp()
+            "timestamp": datetime.utcnow().timestamp()
-        }
+        })
    )
    connection.commit()
    return response
 setup()
--- a/dailyreleases/stores/web.py
+++ b/dailyreleases/stores/web.py
@ -11,8 +11,7 @@ logger = logging.getLogger(__name__)
 def web_search(query: str) -> List[str]:
    logger.debug("Searching Google for %s", query)
    try:
-        # disable rate-limiting since we have a proper API-key (unlike the other APIs we are using)
+        r = cache.get("https://www.googleapis.com/customsearch/v1", params={
        r = cache.get("https://www.googleapis.com/customsearch/v1", ratelimit=None, params={
            "key": CONFIG["google"]["key"],
            "cx": CONFIG["google"]["cx"],
            "q": query