Revert to using timestamp instead of expire in cache - cache cleaning cutoff configurable instead.
This commit is contained in:
parent
46634789a3
commit
c6d1a9ce6e
|
@ -13,6 +13,13 @@ from .config import DATA_DIR, CONFIG
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
|
||||
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name
|
||||
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
|
||||
|
||||
DEFAULT_CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
|
||||
logger.info("Default cache time is %s", DEFAULT_CACHE_TIME)
|
||||
|
||||
|
||||
class Response:
|
||||
def __init__(self, bytes: bytes = None) -> None:
|
||||
|
@ -24,33 +31,23 @@ class Response:
|
|||
return json.loads(self.bytes)
|
||||
|
||||
|
||||
connection = sqlite3.connect(DATA_DIR.joinpath("cache.sqlite"))
|
||||
connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name
|
||||
connection.text_factory = bytes # do not try to decode bytes as utf-8 strings
|
||||
|
||||
CACHE_TIME = timedelta(seconds=CONFIG["web"].getint("cache_time"))
|
||||
logger.info("Default cache time is %s", CACHE_TIME)
|
||||
|
||||
connection.execute(
|
||||
"""
|
||||
def setup():
|
||||
connection.execute("""
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
requests (id INTEGER PRIMARY KEY,
|
||||
url TEXT UNIQUE NOT NULL,
|
||||
response BLOB NOT NULL,
|
||||
expire INTEGER NOT NULL);
|
||||
"""
|
||||
)
|
||||
timestamp INTEGER NOT NULL);
|
||||
""")
|
||||
|
||||
|
||||
def clean():
|
||||
connection.execute(
|
||||
"""
|
||||
def clean(older_than=timedelta(days=3)):
|
||||
connection.execute("""
|
||||
DELETE FROM requests
|
||||
WHERE expire < :expire;
|
||||
WHERE timestamp < :cutoff;
|
||||
""", {
|
||||
"expire": datetime.utcnow().timestamp(),
|
||||
}
|
||||
)
|
||||
"cutoff": (datetime.utcnow() - older_than).timestamp(),
|
||||
})
|
||||
connection.execute("VACUUM;")
|
||||
connection.commit()
|
||||
|
||||
|
@ -58,15 +55,11 @@ def clean():
|
|||
last_request = defaultdict(float)
|
||||
|
||||
|
||||
def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
|
||||
def get(url: str, params: Mapping = None, cache_time: timedelta = DEFAULT_CACHE_TIME,
|
||||
ratelimit: Optional[float] = 1, *args, **kwargs) -> Response:
|
||||
"""
|
||||
Sends a GET request, caching the result for cache_time. If 'ratelimit' is supplied, requests are rate limited at the
|
||||
host-level to this number of requests per second.
|
||||
|
||||
We're saving requests' expire instead of the timestamp it was received to allow for varying cache times; if we were
|
||||
saving the timestamp, clean() wouldn't know when to delete unless the cache time was always the same. This, however,
|
||||
also means that the first call determines for how longer subsequent calls will consider a request fresh.
|
||||
"""
|
||||
if params is not None:
|
||||
url += "?" + urllib.parse.urlencode(params)
|
||||
|
@ -75,17 +68,15 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
|
|||
|
||||
#logger.debug("Get %s", url)
|
||||
|
||||
row = connection.execute(
|
||||
"""
|
||||
SELECT response, expire
|
||||
row = connection.execute("""
|
||||
SELECT response, timestamp
|
||||
FROM requests
|
||||
WHERE url = :url;
|
||||
""", {
|
||||
"url": url
|
||||
}
|
||||
).fetchone()
|
||||
}).fetchone()
|
||||
|
||||
if row is not None and datetime.fromtimestamp(row["expire"]) > datetime.utcnow():
|
||||
if row is not None and datetime.fromtimestamp(row["timestamp"]) > datetime.utcnow() - cache_time:
|
||||
#logger.debug("Cache hit: %s", url)
|
||||
return Response(row["response"])
|
||||
|
||||
|
@ -100,15 +91,16 @@ def get(url: str, params: Mapping = None, cache_time: timedelta = CACHE_TIME,
|
|||
|
||||
response = Response(urlopen(request).read())
|
||||
last_request[request.host] = time.time()
|
||||
connection.execute(
|
||||
"""
|
||||
INSERT OR REPLACE INTO requests(url, response, expire)
|
||||
VALUES (:url, :response, :expire);
|
||||
connection.execute("""
|
||||
INSERT OR REPLACE INTO requests(url, response, timestamp)
|
||||
VALUES (:url, :response, :timestamp);
|
||||
""", {
|
||||
"url": url,
|
||||
"response": response.bytes,
|
||||
"expire": (datetime.utcnow() + cache_time).timestamp()
|
||||
}
|
||||
)
|
||||
"timestamp": datetime.utcnow().timestamp()
|
||||
})
|
||||
connection.commit()
|
||||
return response
|
||||
|
||||
|
||||
setup()
|
||||
|
|
|
@ -11,8 +11,7 @@ logger = logging.getLogger(__name__)
|
|||
def web_search(query: str) -> List[str]:
|
||||
logger.debug("Searching Google for %s", query)
|
||||
try:
|
||||
# disable rate-limiting since we have a proper API-key (unlike the other APIs we are using)
|
||||
r = cache.get("https://www.googleapis.com/customsearch/v1", ratelimit=None, params={
|
||||
r = cache.get("https://www.googleapis.com/customsearch/v1", params={
|
||||
"key": CONFIG["google"]["key"],
|
||||
"cx": CONFIG["google"]["cx"],
|
||||
"q": query
|
||||
|
|
Reference in a new issue