1
0
Fork 0

Revert to using timestamp instead of expire in cache - cache cleaning cutoff configurable instead.

This commit is contained in:
Casper V. Kristensen 2019-05-12 21:01:15 +02:00
parent 46634789a3
commit c6d1a9ce6e
Signed by: caspervk
GPG key ID: 289CA03790535054
2 changed files with 34 additions and 43 deletions

View file

@ -13,6 +13,13 @@ from .config import DATA_DIR, CONFIG
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
DEFAULT_CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
logger.info("Default cache time is %s", DEFAULT_CACHE_TIME)
class Response: class Response:
def __init__(self, bytes: bytes = None) -> None: def __init__(self, bytes: bytes = None) -> None:
@ -24,33 +31,23 @@ class Response:
return json.loads(self.bytes) return json.loads(self.bytes)
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite")) def setup():
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name connection.execute("""
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
logger.info("Default cache time is %s", CACHE_TIME)
connection.execute(
"""
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
requests (id INTEGER PRIMARY KEY, requests (id INTEGER PRIMARY KEY,
url TEXT UNIQUE NOT NULL, url TEXT UNIQUE NOT NULL,
response BLOB NOT NULL, response BLOB NOT NULL,
expire INTEGER NOT NULL); timestamp INTEGER NOT NULL);
""" """)
)
def clean(): def clean(older_than=timedelta(days=3)):
connection.execute( connection.execute("""
"""
DELETE FROM requests DELETE FROM requests
WHERE expire < :expire; WHERE timestamp < :cutoff;
""", { """, {
"expire": datetime.utcnow().timestamp(), "cutoff": (datetime.utcnow() - older_than).timestamp(),
} })
)
connection.execute("VACUUM;") connection.execute("VACUUM;")
connection.commit() connection.commit()
@ -58,15 +55,11 @@ def clean():
last_request = defaultdict(float) last_request = defaultdict(float)
def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME, def get(url: str, params: Mapping = None, cache_time: timedelta = DEFAULT_CACHE_TIME,
ratelimit: Optional[float] = 1, *args, **kwargs) -> Response: ratelimit: Optional[float] = 1, *args, **kwargs) -> Response:
""" """
Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the
host-level to this number of requests per second. host-level to this number of requests per second.
We're saving requests' expire instead of the timestamp it was received to allow for varying cache times; if we were
saving the timestamp, clean() wouldn't know when to delete unless the cache time was always the same. This, however,
also means that the first call determines for how longer subsequent calls will consider a request fresh.
""" """
if params is not None: if params is not None:
url += "?" + urllib.parse.urlencode(params) url += "?" + urllib.parse.urlencode(params)
@ -75,17 +68,15 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
#logger.debug("Get %s", url) #logger.debug("Get %s", url)
row = connection.execute( row = connection.execute("""
""" SELECT response, timestamp
SELECT response, expire
FROM requests FROM requests
WHERE url = :url; WHERE url = :url;
""", { """, {
"url": url "url": url
} }).fetchone()
).fetchone()
if row is not None and datetime.fromtimestamp(row["expire"]) > datetime.utcnow(): if row is not None and datetime.fromtimestamp(row["timestamp"]) > datetime.utcnow() - cache_time:
#logger.debug("Cache hit: %s", url) #logger.debug("Cache hit: %s", url)
return Response(row["response"]) return Response(row["response"])
@ -100,15 +91,16 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
response = Response(urlopen(request).read()) response = Response(urlopen(request).read())
last_request[request.host] = time.time() last_request[request.host] = time.time()
connection.execute( connection.execute("""
""" INSERT OR REPLACE INTO requests(url, response, timestamp)
INSERT OR REPLACE INTO requests(url, response, expire) VALUES (:url, :response, :timestamp);
VALUES (:url, :response, :expire);
""", { """, {
"url": url, "url": url,
"response": response.bytes, "response": response.bytes,
"expire": (datetime.utcnow() + cache_time).timestamp() "timestamp": datetime.utcnow().timestamp()
} })
)
connection.commit() connection.commit()
return response return response
setup()

View file

@ -11,8 +11,7 @@ logger = logging.getLogger(__name__)
def web_search(query: str) -> List[str]: def web_search(query: str) -> List[str]:
logger.debug("Searching Google for %s", query) logger.debug("Searching Google for %s", query)
try: try:
# disable rate-limiting since we have a proper API-key (unlike the other APIs we are using) r = cache.get("https://www.googleapis.com/customsearch/v1", params={
r = cache.get("https://www.googleapis.com/customsearch/v1", ratelimit=None, params={
"key": CONFIG["google"]["key"], "key": CONFIG["google"]["key"],
"cx": CONFIG["google"]["cx"], "cx": CONFIG["google"]["cx"],
"q": query "q": query