This commit is contained in:
Casper V. Kristensen 2024-09-11 00:01:14 +02:00
parent fb54dd01ec
commit 10f28310e9
4 changed files with 292 additions and 47 deletions

205
autosurfer/ct.py Normal file
View file

@ -0,0 +1,205 @@
#!/bin/env python
from datetime import UTC, datetime, timedelta
import logging
import random
from functools import wraps
from json import JSONDecodeError
import asyncio
import base64
from cryptography import x509
import httpx
import structlog
logger = structlog.stdlib.get_logger()
client = httpx.AsyncClient()
def decode_cert(leaf: bytes) -> x509.Certificate:
# MerkleTreeLeaf for timestamped entry containing an x509 certificate:
#
# +------+-----------------------+
# | Byte | |
# +------+-----------------------+
# | 0 | Version |
# +------+-----------------------+
# | 1 | Leaf type |
# +------+-----------------------+
# | 2 | |
# | 3 | |
# | 4 | |
# | 5 | Timestamp |
# | 6 | |
# | 7 | |
# | 8 | |
# | 9 | |
# +------+-----------------------+
# | 10 | Entry type |
# | 11 | |
# +------+-----------------------+
# | 12 | |
# | 13 | Cert length (n) |
# | 14 | |
# +------+-----------------------+
# | 15 | |
# | .. | x509 DER cert |
# | n | |
# +------+-----------------------+
# | n+1 | CT extensions |
# | .. | |
# +------+-----------------------+
#
# https://www.rfc-editor.org/rfc/rfc6962.html#section-3.4
# https://www.rfc-editor.org/rfc/rfc5246.html#section-4
# RFC 6962 only defines version 1 (0x00) of the merkle tree leaf and
# a single leaf type: timestamped entry (0x00).
if (version := leaf[0]) != 0:
raise ValueError(f"Unknown version {version}")
if (leaf_type := leaf[1]) != 0:
raise ValueError(f"Unknown leaf type {leaf_type}")
if leaf[10:12] != b"\x00\x00":
# Timestamped entry type 0x0000 designates a x509 certificate. Type
# 0x001 is a precert, which we can not use, and therefore ignore.
raise TypeError("Not x509 entry")
cert_length = int.from_bytes(leaf[12:15], "big")
cert_bytes = leaf[15 : 15 + cert_length]
cert = x509.load_der_x509_certificate(cert_bytes)
return cert
async def get_log_urls() -> set[str]:
"""TODO."""
# The format of these server lists are not part of the RFC, but both
# Apple's and Google's list follow the same format.
# https://certificate.transparency.dev/useragents/
log_lists = {
"https://www.gstatic.com/ct/log_list/v3/log_list.json",
"https://valid.apple.com/ct/log_list/current_log_list.json",
}
now = datetime.now(tz=UTC)
logs = set()
for log_list in log_lists:
r = await client.get(log_list)
if not r.is_success:
continue
for operator in r.json()["operators"]:
for log in operator["logs"]:
if "usable" not in log["state"]:
continue
interval = log["temporal_interval"]
if datetime.fromisoformat(interval["start_inclusive"]) > now:
continue
if datetime.fromisoformat(interval["end_exclusive"]) < now:
continue
logs.add(log["url"])
if not logs:
raise ValueError("Failed to retrieve certificate log servers")
return logs
def forever(f):
@wraps(f)
async def wrapper(*args, **kwargs):
while True:
try:
await f(*args, **kwargs)
except Exception:
logger.exception("Retrying")
await asyncio.sleep(30)
except:
break
return wrapper
class Watcher:
page_size = 32
def __init__(self, server: str, queue: asyncio.Queue) -> None:
self.server = server
self.queue = queue
self.log = logger.bind(server=server)
self.tree_size = 0
self.tree_watcher = asyncio.create_task(self.watch_tree_size())
self.start = 0
self.end = 0
@forever
async def watch_tree_size(self) -> None:
self.log.debug("get-sth")
r = await client.get(f"{self.server}ct/v1/get-sth")
self.tree_size = r.json()["tree_size"]
self.log.debug("sth", size=self.tree_size)
await asyncio.sleep(600)
@forever
async def watcher(self) -> None:
index = random.randrange(self.start, self.tree_size - self.page_size)
self.log.debug("get-entries", index=index)
r = await client.get(
f"{self.server}ct/v1/get-entries",
params={
"start": index,
"end": index + self.page_size,
},
)
entries = r.json()["entries"]
now = datetime.now(tz=UTC)
for entry in entries:
leaf = base64.b64decode(entry["leaf_input"])
try:
cert = decode_cert(leaf)
except TypeError:
# Ignore precerts
continue
# Move start of search space up if certificate was issued more than
# 398 days ago; the maximum validity period of public certificates.
# https://cabforum.org/working-groups/server/baseline-requirements/documents/CA-Browser-Forum-TLS-BR-2.0.7.pdf#3d
if cert.not_valid_before_utc < now - timedelta(days=398):
print(cert.not_valid_before_utc, "moving from", self.start, "to", index)
self.start = index
break
if cert.not_valid_before_utc > now:
continue
if cert.not_valid_after_utc < now:
continue
await self.queue.put(cert)
q = asyncio.Queue(maxsize=128)
async def asd():
while True:
# await asyncio.sleep(0.1)
cert = await q.get()
print(cert)
async def main():
asyncio.create_task(asd())
urls = await get_log_urls()
for url in urls:
w = Watcher(url, q)
asyncio.create_task(w.watch_tree_size())
await asyncio.sleep(3)
asyncio.create_task(w.watcher())
break
await asyncio.sleep(99999)
asyncio.run(main())
# TODO:
# if 429 too many request => self.sleep += 1
# if queue empty: crash (something is definitely wrong!)

View file

@ -1,15 +1,11 @@
import asyncio
import json
import math
import os
import random
import websockets
from selenium import webdriver
from selenium.common.exceptions import InvalidSessionIdException
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.remote.webelement import WebElement
service = webdriver.FirefoxService(
# Selenium only checks /usr/bin/geckodriver by default
@ -27,42 +23,6 @@ driver = webdriver.Firefox(service=service, options=options)
driver.set_page_load_timeout(3)
async def ct_stream(domains: asyncio.Queue) -> None:
"""Watch Certificate Transparency (CT) logs for new certificates."""
while True:
try:
async with websockets.connect("wss://certstream.calidog.io") as websocket:
async for message_data in websocket:
ct_handler(message_data, domains)
except (KeyboardInterrupt, asyncio.CancelledError):
return
except Exception as e:
print(e)
def ct_handler(data: websockets.Data, domains: asyncio.Queue) -> None:
"""Save certificate's domain to queue if needed."""
# There are A LOT of certificates coming through the transparency logs;
# immediately bail without spending time decoding the message if we have
# enough domains queued up already.
if domains.full():
return
message = json.loads(data)
if message["message_type"] != "certificate_update":
return
# Certificates can verify multiple domains: We arbitrarily select the first
# non-wildcard one since we cannot connect to such host in the browser.
cert_domains = message["data"]["leaf_cert"]["all_domains"]
try:
cert_domain = next(d for d in cert_domains if "*" not in d)
except StopIteration:
return
domains.put_nowait(cert_domain)
async def surf(url: str) -> None:
"""Surf around URL for a bit."""
for i in range(math.ceil(random.expovariate(0.5))):
@ -77,7 +37,9 @@ async def surf(url: str) -> None:
except InvalidSessionIdException:
# Browser closed: no way to recover
raise
except WebDriverException:
except WebDriverException as e:
print(e)
print(type(e))
# Timeout, network error, JavaScript failure etc.
break
try:
@ -92,11 +54,13 @@ async def surfer() -> None:
ct_stream_task = asyncio.create_task(ct_stream(domains))
while True:
try:
# TODO: asyncio.wait_for?
domain = await domains.get()
url = f"https://{domain}"
await surf(url)
except (KeyboardInterrupt, asyncio.CancelledError):
break
except (KeyboardInterrupt, asyncio.CancelledError) as e:
print(e)
raise
ct_stream_task.cancel()

71
autosurfer/test.py Normal file

File diff suppressed because one or more lines are too long

View file

@ -45,8 +45,10 @@
})
pkgs.geckodriver
(pkgs.python3.withPackages (ps: [
ps.cryptography
ps.httpx
ps.selenium
ps.websockets
ps.structlog
]))
# pkgs.bashInteractive
# pkgs.coreutils
@ -86,12 +88,15 @@
# required for Firefox to start.
"HOME=/"
];
Entrypoint = ["python" "/autosurfer/main.py"];
# Entrypoint = ["python" "/autosurfer/main.py"];
Entrypoint = ["python" "/autosurfer/ct.py"];
};
};
# `nix shell`
default = env;
};
apps.${system}.default = {
type = "app";
program = "${self.packages.${system}.default}/bin/python autosurfer/ct.py";
};
};
}