Revert to using timestamp instead of expire in cache - cache cleaning cutoff configurable instead.
This commit is contained in:
parent
46634789a3
commit
c6d1a9ce6e
|
@ -13,6 +13,13 @@ from .config import DATA_DIR, CONFIG
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
|
||||||
|
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name
|
||||||
|
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
|
||||||
|
|
||||||
|
DEFAULT_CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
|
||||||
|
logger.info("Default cache time is %s", DEFAULT_CACHE_TIME)
|
||||||
|
|
||||||
|
|
||||||
class Response:
|
class Response:
|
||||||
def __init__(self, bytes: bytes = None) -> None:
|
def __init__(self, bytes: bytes = None) -> None:
|
||||||
|
@ -24,33 +31,23 @@ class Response:
|
||||||
return json.loads(self.bytes)
|
return json.loads(self.bytes)
|
||||||
|
|
||||||
|
|
||||||
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
|
def setup():
|
||||||
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name
|
connection.execute("""
|
||||||
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
|
CREATE TABLE IF NOT EXISTS
|
||||||
|
requests (id INTEGER PRIMARY KEY,
|
||||||
CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
|
url TEXT UNIQUE NOT NULL,
|
||||||
logger.info("Default cache time is %s", CACHE_TIME)
|
response BLOB NOT NULL,
|
||||||
|
timestamp INTEGER NOT NULL);
|
||||||
connection.execute(
|
""")
|
||||||
"""
|
|
||||||
CREATE TABLE IF NOT EXISTS
|
|
||||||
requests (id INTEGER PRIMARY KEY,
|
|
||||||
url TEXT UNIQUE NOT NULL,
|
|
||||||
response BLOB NOT NULL,
|
|
||||||
expire INTEGER NOT NULL);
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def clean():
|
def clean(older_than=timedelta(days=3)):
|
||||||
connection.execute(
|
connection.execute("""
|
||||||
"""
|
|
||||||
DELETE FROM requests
|
DELETE FROM requests
|
||||||
WHERE expire < :expire;
|
WHERE timestamp < :cutoff;
|
||||||
""", {
|
""", {
|
||||||
"expire": datetime.utcnow().timestamp(),
|
"cutoff": (datetime.utcnow() - older_than).timestamp(),
|
||||||
}
|
})
|
||||||
)
|
|
||||||
connection.execute("VACUUM;")
|
connection.execute("VACUUM;")
|
||||||
connection.commit()
|
connection.commit()
|
||||||
|
|
||||||
|
@ -58,15 +55,11 @@ def clean():
|
||||||
last_request = defaultdict(float)
|
last_request = defaultdict(float)
|
||||||
|
|
||||||
|
|
||||||
def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
|
def get(url: str, params: Mapping = None, cache_time: timedelta = DEFAULT_CACHE_TIME,
|
||||||
ratelimit: Optional[float] = 1, *args, **kwargs) -> Response:
|
ratelimit: Optional[float] = 1, *args, **kwargs) -> Response:
|
||||||
"""
|
"""
|
||||||
Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the
|
Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the
|
||||||
host-level to this number of requests per second.
|
host-level to this number of requests per second.
|
||||||
|
|
||||||
We're saving requests' expire instead of the timestamp it was received to allow for varying cache times; if we were
|
|
||||||
saving the timestamp, clean() wouldn't know when to delete unless the cache time was always the same. This, however,
|
|
||||||
also means that the first call determines for how longer subsequent calls will consider a request fresh.
|
|
||||||
"""
|
"""
|
||||||
if params is not None:
|
if params is not None:
|
||||||
url += "?" + urllib.parse.urlencode(params)
|
url += "?" + urllib.parse.urlencode(params)
|
||||||
|
@ -75,17 +68,15 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
|
||||||
|
|
||||||
#logger.debug("Get %s", url)
|
#logger.debug("Get %s", url)
|
||||||
|
|
||||||
row = connection.execute(
|
row = connection.execute("""
|
||||||
"""
|
SELECT response, timestamp
|
||||||
SELECT response, expire
|
|
||||||
FROM requests
|
FROM requests
|
||||||
WHERE url = :url;
|
WHERE url = :url;
|
||||||
""", {
|
""", {
|
||||||
"url": url
|
"url": url
|
||||||
}
|
}).fetchone()
|
||||||
).fetchone()
|
|
||||||
|
|
||||||
if row is not None and datetime.fromtimestamp(row["expire"]) > datetime.utcnow():
|
if row is not None and datetime.fromtimestamp(row["timestamp"]) > datetime.utcnow() - cache_time:
|
||||||
#logger.debug("Cache hit: %s", url)
|
#logger.debug("Cache hit: %s", url)
|
||||||
return Response(row["response"])
|
return Response(row["response"])
|
||||||
|
|
||||||
|
@ -100,15 +91,16 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
|
||||||
|
|
||||||
response = Response(urlopen(request).read())
|
response = Response(urlopen(request).read())
|
||||||
last_request[request.host] = time.time()
|
last_request[request.host] = time.time()
|
||||||
connection.execute(
|
connection.execute("""
|
||||||
"""
|
INSERT OR REPLACE INTO requests(url, response, timestamp)
|
||||||
INSERT OR REPLACE INTO requests(url, response, expire)
|
VALUES (:url, :response, :timestamp);
|
||||||
VALUES (:url, :response, :expire);
|
|
||||||
""", {
|
""", {
|
||||||
"url": url,
|
"url": url,
|
||||||
"response": response.bytes,
|
"response": response.bytes,
|
||||||
"expire": (datetime.utcnow() + cache_time).timestamp()
|
"timestamp": datetime.utcnow().timestamp()
|
||||||
}
|
})
|
||||||
)
|
|
||||||
connection.commit()
|
connection.commit()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
setup()
|
||||||
|
|
|
@ -11,8 +11,7 @@ logger = logging.getLogger(__name__)
|
||||||
def web_search(query: str) -> List[str]:
|
def web_search(query: str) -> List[str]:
|
||||||
logger.debug("Searching Google for %s", query)
|
logger.debug("Searching Google for %s", query)
|
||||||
try:
|
try:
|
||||||
# disable rate-limiting since we have a proper API-key (unlike the other APIs we are using)
|
r = cache.get("https://www.googleapis.com/customsearch/v1", params={
|
||||||
r = cache.get("https://www.googleapis.com/customsearch/v1", ratelimit=None, params={
|
|
||||||
"key": CONFIG["google"]["key"],
|
"key": CONFIG["google"]["key"],
|
||||||
"cx": CONFIG["google"]["cx"],
|
"cx": CONFIG["google"]["cx"],
|
||||||
"q": query
|
"q": query
|
||||||
|
|
Reference in a new issue