1
0
Fork 0

Revert to using timestamp instead of expire in cache - cache cleaning cutoff configurable instead.

This commit is contained in:
Casper V. Kristensen 2019-05-12 21:01:15 +02:00
parent 46634789a3
commit c6d1a9ce6e
Signed by: caspervk
GPG key ID: 289CA03790535054
2 changed files with 34 additions and 43 deletions

View file

@ -13,6 +13,13 @@ from .config import DATA_DIR, CONFIG
logger = logging.getLogger(__name__)
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
DEFAULT_CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
logger.info("Default cache time is %s", DEFAULT_CACHE_TIME)
class Response:
def __init__(self, bytes: bytes = None) -> None:
@ -24,33 +31,23 @@ class Response:
return json.loads(self.bytes)
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
logger.info("Default cache time is %s", CACHE_TIME)
connection.execute(
"""
def setup():
connection.execute("""
CREATE TABLE IF NOT EXISTS
requests (id INTEGER PRIMARY KEY,
url TEXT UNIQUE NOT NULL,
response BLOB NOT NULL,
expire INTEGER NOT NULL);
"""
)
timestamp INTEGER NOT NULL);
""")
def clean():
connection.execute(
"""
def clean(older_than=timedelta(days=3)):
connection.execute("""
DELETE FROM requests
WHERE expire < :expire;
WHERE timestamp < :cutoff;
""", {
"expire": datetime.utcnow().timestamp(),
}
)
"cutoff": (datetime.utcnow() - older_than).timestamp(),
})
connection.execute("VACUUM;")
connection.commit()
@ -58,15 +55,11 @@ def clean():
last_request = defaultdict(float)
def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
def get(url: str, params: Mapping = None, cache_time: timedelta = DEFAULT_CACHE_TIME,
ratelimit: Optional[float] = 1, *args, **kwargs) -> Response:
"""
Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the
host-level to this number of requests per second.
We're saving requests' expire instead of the timestamp it was received to allow for varying cache times; if we were
saving the timestamp, clean() wouldn't know when to delete unless the cache time was always the same. This, however,
also means that the first call determines for how longer subsequent calls will consider a request fresh.
"""
if params is not None:
url += "?" + urllib.parse.urlencode(params)
@ -75,17 +68,15 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
#logger.debug("Get %s", url)
row = connection.execute(
"""
SELECT response, expire
row = connection.execute("""
SELECT response, timestamp
FROM requests
WHERE url = :url;
""", {
"url": url
}
).fetchone()
}).fetchone()
if row is not None and datetime.fromtimestamp(row["expire"]) > datetime.utcnow():
if row is not None and datetime.fromtimestamp(row["timestamp"]) > datetime.utcnow() - cache_time:
#logger.debug("Cache hit: %s", url)
return Response(row["response"])
@ -100,15 +91,16 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
response = Response(urlopen(request).read())
last_request[request.host] = time.time()
connection.execute(
"""
INSERT OR REPLACE INTO requests(url, response, expire)
VALUES (:url, :response, :expire);
connection.execute("""
INSERT OR REPLACE INTO requests(url, response, timestamp)
VALUES (:url, :response, :timestamp);
""", {
"url": url,
"response": response.bytes,
"expire": (datetime.utcnow() + cache_time).timestamp()
}
)
"timestamp": datetime.utcnow().timestamp()
})
connection.commit()
return response
setup()

View file

@ -11,8 +11,7 @@ logger = logging.getLogger(__name__)
def web_search(query: str) -> List[str]:
logger.debug("Searching Google for %s", query)
try:
# disable rate-limiting since we have a proper API-key (unlike the other APIs we are using)
r = cache.get("https://www.googleapis.com/customsearch/v1", ratelimit=None, params={
r = cache.get("https://www.googleapis.com/customsearch/v1", params={
"key": CONFIG["google"]["key"],
"cx": CONFIG["google"]["cx"],
"q": query