This commit is contained in:
Casper V. Kristensen 2024-09-05 01:53:19 +02:00
parent fb54dd01ec
commit 7f5b867c37
4 changed files with 254 additions and 42 deletions

177
autosurfer/ct.py Normal file
View file

@ -0,0 +1,177 @@
from datetime import UTC, datetime
import logging
import random
from functools import wraps
from json import JSONDecodeError
import asyncio
import base64
from cryptography import x509
import httpx
import structlog
logger = structlog.stdlib.get_logger()
client = httpx.AsyncClient()
async def get_servers() -> set[str]:
"""TODO."""
# The format of these server lists are not part of the RFC.
# https://certificate.transparency.dev/useragents/
server_lists = {
"https://www.gstatic.com/ct/log_list/v3/log_list.json",
"https://valid.apple.com/ct/log_list/current_log_list.json",
}
servers = set()
now = datetime.now(tz=UTC)
for server_list in server_lists:
try:
r = await client.get(server_list)
r.raise_for_status()
servers.update(
log["url"]
for operator in r.json()["operators"]
for log in operator["logs"]
if ("usable" in log["state"]
and datetime.fromisoformat(log["temporal_interval"]["start_inclusive"]) <= now
and datetime.fromisoformat(log["temporal_interval"]["end_exclusive"]) > now)
)
except:
logger.exception("Error in log server list")
continue
if not servers:
raise ValueError("All log server lists failed")
return servers
def decode_cert(leaf: bytes) -> x509.Certificate:
# MerkleTreeLeaf for timestamped entry containing an x509 certificate:
#
# +------+-----------------------+
# | Byte | |
# +------+-----------------------+
# | 0 | Version |
# +------+-----------------------+
# | 1 | Leaf type |
# +------+-----------------------+
# | 2 | |
# | 3 | |
# | 4 | |
# | 5 | Timestamp |
# | 6 | |
# | 7 | |
# | 8 | |
# | 9 | |
# +------+-----------------------+
# | 10 | Entry type |
# | 11 | |
# +------+-----------------------+
# | 12 | |
# | 13 | Cert length (n) |
# | 14 | |
# +------+-----------------------+
# | 15 | |
# | .. | x509 DER cert |
# | n | |
# +------+-----------------------+
# | n+1 | CT extensions |
# | .. | |
# +------+-----------------------+
#
# https://www.rfc-editor.org/rfc/rfc6962.html#section-3.4
# https://www.rfc-editor.org/rfc/rfc5246.html#section-4
# RFC 6962 only defines version 1 (0x00) of the merkle tree leaf and
# a single leaf type: timestamped entry (0x00).
if (version := leaf[0]) != 0:
raise ValueError(f"Unknown version {version}")
if (leaf_type := leaf[1]) != 0:
raise ValueError(f"Unknown leaf type {leaf_type}")
if leaf[10:12] != b"\x00\x00":
# Timestamped entry type 0x0000 designates a x509 certificate. Type
# 0x001 is a precert, which we can not use, and therefore ignore.
raise TypeError("Not x509 entry")
cert_length = int.from_bytes(leaf[12:15], "big")
cert_bytes = leaf[15 : 15 + cert_length]
cert = x509.load_der_x509_certificate(cert_bytes)
return cert
def forever(f):
@wraps(f)
async def wrapper(*args, **kwargs):
while True:
try:
await f(*args, **kwargs)
except Exception:
logger.exception("Retrying")
await asyncio.sleep(30)
except:
break
return wrapper
class Watcher:
page_size = 100
def __init__(self, server: str, queue: asyncio.Queue) -> None:
self.server = server
self.queue = queue
self.log = logger.bind(server=server)
self.tree_size = 0
self.tree_watcher = asyncio.create_task(self.watch_tree_size())
self.start = 0
self.end = 0
@forever
async def watch_tree_size(self) -> None:
r = await client.get(f"{self.server}ct/v1/get-sth")
self.tree_size = r.json()["tree_size"]
self.log.debug("Tree size", size=self.tree_size)
await asyncio.sleep(600)
@forever
async def watcher(self) -> None:
index = random.randrange(self.start, self.tree_size - self.page_size)
r = await client.get(f"{self.server}ct/v1/get-entries", params={"start": index, "end": index + self.page_size,},)
entries = r.json()["entries"]
now = datetime.now(tz=UTC)
expired = 0
for entry in entries:
leaf = base64.b64decode(entry["leaf_input"])
try:
cert = decode_cert(leaf)
except TypeError:
# Ignore precerts
continue
if cert.not_valid_before_utc > now:
continue
if cert.not_valid_after_utc < now:
expired += 1
continue
await self.queue.put(cert)
# All expired: move up
if len(entries) == expired > 5:
self.start = index
q = asyncio.Queue(maxsize=100)
async def asd():
while True:
# await asyncio.sleep(10)
cert = await q.get()
print(cert)
asyncio.run(main(q))

View file

@ -1,15 +1,11 @@
import asyncio
import json
import math
import os
import random
import websockets
from selenium import webdriver
from selenium.common.exceptions import InvalidSessionIdException
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.remote.webelement import WebElement
service = webdriver.FirefoxService(
# Selenium only checks /usr/bin/geckodriver by default
@ -27,42 +23,6 @@ driver = webdriver.Firefox(service=service, options=options)
driver.set_page_load_timeout(3)
async def ct_stream(domains: asyncio.Queue) -> None:
"""Watch Certificate Transparency (CT) logs for new certificates."""
while True:
try:
async with websockets.connect("wss://certstream.calidog.io") as websocket:
async for message_data in websocket:
ct_handler(message_data, domains)
except (KeyboardInterrupt, asyncio.CancelledError):
return
except Exception as e:
print(e)
def ct_handler(data: websockets.Data, domains: asyncio.Queue) -> None:
"""Save certificate's domain to queue if needed."""
# There are A LOT of certificates coming through the transparency logs;
# immediately bail without spending time decoding the message if we have
# enough domains queued up already.
if domains.full():
return
message = json.loads(data)
if message["message_type"] != "certificate_update":
return
# Certificates can verify multiple domains: We arbitrarily select the first
# non-wildcard one since we cannot connect to such host in the browser.
cert_domains = message["data"]["leaf_cert"]["all_domains"]
try:
cert_domain = next(d for d in cert_domains if "*" not in d)
except StopIteration:
return
domains.put_nowait(cert_domain)
async def surf(url: str) -> None:
"""Surf around URL for a bit."""
for i in range(math.ceil(random.expovariate(0.5))):
@ -77,7 +37,9 @@ async def surf(url: str) -> None:
except InvalidSessionIdException:
# Browser closed: no way to recover
raise
except WebDriverException:
except WebDriverException as e:
print(e)
print(type(e))
# Timeout, network error, JavaScript failure etc.
break
try:

71
autosurfer/test.py Normal file

File diff suppressed because one or more lines are too long

View file

@ -45,8 +45,10 @@
})
pkgs.geckodriver
(pkgs.python3.withPackages (ps: [
ps.cryptography
ps.httpx
ps.selenium
ps.websockets
ps.structlog
]))
# pkgs.bashInteractive
# pkgs.coreutils