wip
This commit is contained in:
parent
fb54dd01ec
commit
10f28310e9
4 changed files with 292 additions and 47 deletions
205
autosurfer/ct.py
Normal file
205
autosurfer/ct.py
Normal file
|
@ -0,0 +1,205 @@
|
|||
#!/bin/env python
|
||||
from datetime import UTC, datetime, timedelta
|
||||
import logging
|
||||
import random
|
||||
from functools import wraps
|
||||
from json import JSONDecodeError
|
||||
import asyncio
|
||||
import base64
|
||||
|
||||
from cryptography import x509
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
|
||||
logger = structlog.stdlib.get_logger()
|
||||
client = httpx.AsyncClient()
|
||||
|
||||
|
||||
|
||||
def decode_cert(leaf: bytes) -> x509.Certificate:
|
||||
# MerkleTreeLeaf for timestamped entry containing an x509 certificate:
|
||||
#
|
||||
# +------+-----------------------+
|
||||
# | Byte | |
|
||||
# +------+-----------------------+
|
||||
# | 0 | Version |
|
||||
# +------+-----------------------+
|
||||
# | 1 | Leaf type |
|
||||
# +------+-----------------------+
|
||||
# | 2 | |
|
||||
# | 3 | |
|
||||
# | 4 | |
|
||||
# | 5 | Timestamp |
|
||||
# | 6 | |
|
||||
# | 7 | |
|
||||
# | 8 | |
|
||||
# | 9 | |
|
||||
# +------+-----------------------+
|
||||
# | 10 | Entry type |
|
||||
# | 11 | |
|
||||
# +------+-----------------------+
|
||||
# | 12 | |
|
||||
# | 13 | Cert length (n) |
|
||||
# | 14 | |
|
||||
# +------+-----------------------+
|
||||
# | 15 | |
|
||||
# | .. | x509 DER cert |
|
||||
# | n | |
|
||||
# +------+-----------------------+
|
||||
# | n+1 | CT extensions |
|
||||
# | .. | |
|
||||
# +------+-----------------------+
|
||||
#
|
||||
# https://www.rfc-editor.org/rfc/rfc6962.html#section-3.4
|
||||
# https://www.rfc-editor.org/rfc/rfc5246.html#section-4
|
||||
|
||||
# RFC 6962 only defines version 1 (0x00) of the merkle tree leaf and
|
||||
# a single leaf type: timestamped entry (0x00).
|
||||
if (version := leaf[0]) != 0:
|
||||
raise ValueError(f"Unknown version {version}")
|
||||
if (leaf_type := leaf[1]) != 0:
|
||||
raise ValueError(f"Unknown leaf type {leaf_type}")
|
||||
|
||||
if leaf[10:12] != b"\x00\x00":
|
||||
# Timestamped entry type 0x0000 designates a x509 certificate. Type
|
||||
# 0x001 is a precert, which we can not use, and therefore ignore.
|
||||
raise TypeError("Not x509 entry")
|
||||
|
||||
cert_length = int.from_bytes(leaf[12:15], "big")
|
||||
cert_bytes = leaf[15 : 15 + cert_length]
|
||||
cert = x509.load_der_x509_certificate(cert_bytes)
|
||||
return cert
|
||||
|
||||
|
||||
async def get_log_urls() -> set[str]:
|
||||
"""TODO."""
|
||||
# The format of these server lists are not part of the RFC, but both
|
||||
# Apple's and Google's list follow the same format.
|
||||
# https://certificate.transparency.dev/useragents/
|
||||
log_lists = {
|
||||
"https://www.gstatic.com/ct/log_list/v3/log_list.json",
|
||||
"https://valid.apple.com/ct/log_list/current_log_list.json",
|
||||
}
|
||||
now = datetime.now(tz=UTC)
|
||||
logs = set()
|
||||
for log_list in log_lists:
|
||||
r = await client.get(log_list)
|
||||
if not r.is_success:
|
||||
continue
|
||||
for operator in r.json()["operators"]:
|
||||
for log in operator["logs"]:
|
||||
if "usable" not in log["state"]:
|
||||
continue
|
||||
interval = log["temporal_interval"]
|
||||
if datetime.fromisoformat(interval["start_inclusive"]) > now:
|
||||
continue
|
||||
if datetime.fromisoformat(interval["end_exclusive"]) < now:
|
||||
continue
|
||||
logs.add(log["url"])
|
||||
if not logs:
|
||||
raise ValueError("Failed to retrieve certificate log servers")
|
||||
return logs
|
||||
|
||||
|
||||
|
||||
def forever(f):
|
||||
@wraps(f)
|
||||
async def wrapper(*args, **kwargs):
|
||||
while True:
|
||||
try:
|
||||
await f(*args, **kwargs)
|
||||
except Exception:
|
||||
logger.exception("Retrying")
|
||||
await asyncio.sleep(30)
|
||||
except:
|
||||
break
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class Watcher:
|
||||
page_size = 32
|
||||
|
||||
def __init__(self, server: str, queue: asyncio.Queue) -> None:
|
||||
self.server = server
|
||||
self.queue = queue
|
||||
|
||||
self.log = logger.bind(server=server)
|
||||
|
||||
self.tree_size = 0
|
||||
self.tree_watcher = asyncio.create_task(self.watch_tree_size())
|
||||
|
||||
self.start = 0
|
||||
self.end = 0
|
||||
|
||||
@forever
|
||||
async def watch_tree_size(self) -> None:
|
||||
self.log.debug("get-sth")
|
||||
r = await client.get(f"{self.server}ct/v1/get-sth")
|
||||
self.tree_size = r.json()["tree_size"]
|
||||
self.log.debug("sth", size=self.tree_size)
|
||||
await asyncio.sleep(600)
|
||||
|
||||
@forever
|
||||
async def watcher(self) -> None:
|
||||
index = random.randrange(self.start, self.tree_size - self.page_size)
|
||||
self.log.debug("get-entries", index=index)
|
||||
r = await client.get(
|
||||
f"{self.server}ct/v1/get-entries",
|
||||
params={
|
||||
"start": index,
|
||||
"end": index + self.page_size,
|
||||
},
|
||||
)
|
||||
entries = r.json()["entries"]
|
||||
|
||||
now = datetime.now(tz=UTC)
|
||||
for entry in entries:
|
||||
leaf = base64.b64decode(entry["leaf_input"])
|
||||
try:
|
||||
cert = decode_cert(leaf)
|
||||
except TypeError:
|
||||
# Ignore precerts
|
||||
continue
|
||||
# Move start of search space up if certificate was issued more than
|
||||
# 398 days ago; the maximum validity period of public certificates.
|
||||
# https://cabforum.org/working-groups/server/baseline-requirements/documents/CA-Browser-Forum-TLS-BR-2.0.7.pdf#3d
|
||||
if cert.not_valid_before_utc < now - timedelta(days=398):
|
||||
print(cert.not_valid_before_utc, "moving from", self.start, "to", index)
|
||||
self.start = index
|
||||
break
|
||||
if cert.not_valid_before_utc > now:
|
||||
continue
|
||||
if cert.not_valid_after_utc < now:
|
||||
continue
|
||||
await self.queue.put(cert)
|
||||
|
||||
|
||||
|
||||
q = asyncio.Queue(maxsize=128)
|
||||
|
||||
|
||||
async def asd():
|
||||
while True:
|
||||
# await asyncio.sleep(0.1)
|
||||
cert = await q.get()
|
||||
print(cert)
|
||||
|
||||
|
||||
async def main():
|
||||
asyncio.create_task(asd())
|
||||
urls = await get_log_urls()
|
||||
for url in urls:
|
||||
w = Watcher(url, q)
|
||||
asyncio.create_task(w.watch_tree_size())
|
||||
await asyncio.sleep(3)
|
||||
asyncio.create_task(w.watcher())
|
||||
break
|
||||
await asyncio.sleep(99999)
|
||||
|
||||
asyncio.run(main())
|
||||
|
||||
# TODO:
|
||||
# if 429 too many request => self.sleep += 1
|
||||
# if queue empty: crash (something is definitely wrong!)
|
|
@ -1,15 +1,11 @@
|
|||
import asyncio
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
|
||||
import websockets
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import InvalidSessionIdException
|
||||
from selenium.common.exceptions import WebDriverException
|
||||
from selenium.webdriver.firefox.service import Service
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
service = webdriver.FirefoxService(
|
||||
# Selenium only checks /usr/bin/geckodriver by default
|
||||
|
@ -27,42 +23,6 @@ driver = webdriver.Firefox(service=service, options=options)
|
|||
driver.set_page_load_timeout(3)
|
||||
|
||||
|
||||
async def ct_stream(domains: asyncio.Queue) -> None:
|
||||
"""Watch Certificate Transparency (CT) logs for new certificates."""
|
||||
while True:
|
||||
try:
|
||||
async with websockets.connect("wss://certstream.calidog.io") as websocket:
|
||||
async for message_data in websocket:
|
||||
ct_handler(message_data, domains)
|
||||
except (KeyboardInterrupt, asyncio.CancelledError):
|
||||
return
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
def ct_handler(data: websockets.Data, domains: asyncio.Queue) -> None:
|
||||
"""Save certificate's domain to queue if needed."""
|
||||
# There are A LOT of certificates coming through the transparency logs;
|
||||
# immediately bail without spending time decoding the message if we have
|
||||
# enough domains queued up already.
|
||||
if domains.full():
|
||||
return
|
||||
|
||||
message = json.loads(data)
|
||||
if message["message_type"] != "certificate_update":
|
||||
return
|
||||
|
||||
# Certificates can verify multiple domains: We arbitrarily select the first
|
||||
# non-wildcard one since we cannot connect to such host in the browser.
|
||||
cert_domains = message["data"]["leaf_cert"]["all_domains"]
|
||||
try:
|
||||
cert_domain = next(d for d in cert_domains if "*" not in d)
|
||||
except StopIteration:
|
||||
return
|
||||
|
||||
domains.put_nowait(cert_domain)
|
||||
|
||||
|
||||
async def surf(url: str) -> None:
|
||||
"""Surf around URL for a bit."""
|
||||
for i in range(math.ceil(random.expovariate(0.5))):
|
||||
|
@ -77,7 +37,9 @@ async def surf(url: str) -> None:
|
|||
except InvalidSessionIdException:
|
||||
# Browser closed: no way to recover
|
||||
raise
|
||||
except WebDriverException:
|
||||
except WebDriverException as e:
|
||||
print(e)
|
||||
print(type(e))
|
||||
# Timeout, network error, JavaScript failure etc.
|
||||
break
|
||||
try:
|
||||
|
@ -92,11 +54,13 @@ async def surfer() -> None:
|
|||
ct_stream_task = asyncio.create_task(ct_stream(domains))
|
||||
while True:
|
||||
try:
|
||||
# TODO: asyncio.wait_for?
|
||||
domain = await domains.get()
|
||||
url = f"https://{domain}"
|
||||
await surf(url)
|
||||
except (KeyboardInterrupt, asyncio.CancelledError):
|
||||
break
|
||||
except (KeyboardInterrupt, asyncio.CancelledError) as e:
|
||||
print(e)
|
||||
raise
|
||||
ct_stream_task.cancel()
|
||||
|
||||
|
||||
|
|
71
autosurfer/test.py
Normal file
71
autosurfer/test.py
Normal file
File diff suppressed because one or more lines are too long
13
flake.nix
13
flake.nix
|
@ -45,8 +45,10 @@
|
|||
})
|
||||
pkgs.geckodriver
|
||||
(pkgs.python3.withPackages (ps: [
|
||||
ps.cryptography
|
||||
ps.httpx
|
||||
ps.selenium
|
||||
ps.websockets
|
||||
ps.structlog
|
||||
]))
|
||||
# pkgs.bashInteractive
|
||||
# pkgs.coreutils
|
||||
|
@ -86,12 +88,15 @@
|
|||
# required for Firefox to start.
|
||||
"HOME=/"
|
||||
];
|
||||
Entrypoint = ["python" "/autosurfer/main.py"];
|
||||
# Entrypoint = ["python" "/autosurfer/main.py"];
|
||||
Entrypoint = ["python" "/autosurfer/ct.py"];
|
||||
};
|
||||
};
|
||||
|
||||
# `nix shell`
|
||||
default = env;
|
||||
};
|
||||
apps.${system}.default = {
|
||||
type = "app";
|
||||
program = "${self.packages.${system}.default}/bin/python autosurfer/ct.py";
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue