Compare commits

...

2 commits

Author SHA1 Message Date
Casper V. Kristensen 7f5b867c37 wip 2024-09-05 01:53:19 +02:00
Casper V. Kristensen fb54dd01ec surf around for a bit 2024-08-07 00:38:25 +02:00
5 changed files with 286 additions and 45 deletions

View file

@ -25,12 +25,18 @@ newly issued certificates and attempts to open the domain in Firefox using
Selenium. Selenium.
## Building ## Development
```shell ```shell
# Build
nix build .#oci nix build .#oci
./result | podman load ./result | podman load
podman run --rm autosurfer:dev podman run --rm autosurfer:dev
# podman push autosurfer:dev quay.io/caspervk/autosurfer:latest
# Release
podman push autosurfer:dev quay.io/caspervk/autosurfer:latest
# 👉😎👉
podman run --rm -v ./autosurfer/:/autosurfer/:ro --network host --env DISPLAY --security-opt label=type:container_runtime_t autosurfer:dev
``` ```

177
autosurfer/ct.py Normal file
View file

@ -0,0 +1,177 @@
from datetime import UTC, datetime
import logging
import random
from functools import wraps
from json import JSONDecodeError
import asyncio
import base64
from cryptography import x509
import httpx
import structlog
logger = structlog.stdlib.get_logger()
client = httpx.AsyncClient()
async def get_servers() -> set[str]:
"""TODO."""
# The format of these server lists are not part of the RFC.
# https://certificate.transparency.dev/useragents/
server_lists = {
"https://www.gstatic.com/ct/log_list/v3/log_list.json",
"https://valid.apple.com/ct/log_list/current_log_list.json",
}
servers = set()
now = datetime.now(tz=UTC)
for server_list in server_lists:
try:
r = await client.get(server_list)
r.raise_for_status()
servers.update(
log["url"]
for operator in r.json()["operators"]
for log in operator["logs"]
if ("usable" in log["state"]
and datetime.fromisoformat(log["temporal_interval"]["start_inclusive"]) <= now
and datetime.fromisoformat(log["temporal_interval"]["end_exclusive"]) > now)
)
except:
logger.exception("Error in log server list")
continue
if not servers:
raise ValueError("All log server lists failed")
return servers
def decode_cert(leaf: bytes) -> x509.Certificate:
# MerkleTreeLeaf for timestamped entry containing an x509 certificate:
#
# +------+-----------------------+
# | Byte | |
# +------+-----------------------+
# | 0 | Version |
# +------+-----------------------+
# | 1 | Leaf type |
# +------+-----------------------+
# | 2 | |
# | 3 | |
# | 4 | |
# | 5 | Timestamp |
# | 6 | |
# | 7 | |
# | 8 | |
# | 9 | |
# +------+-----------------------+
# | 10 | Entry type |
# | 11 | |
# +------+-----------------------+
# | 12 | |
# | 13 | Cert length (n) |
# | 14 | |
# +------+-----------------------+
# | 15 | |
# | .. | x509 DER cert |
# | n | |
# +------+-----------------------+
# | n+1 | CT extensions |
# | .. | |
# +------+-----------------------+
#
# https://www.rfc-editor.org/rfc/rfc6962.html#section-3.4
# https://www.rfc-editor.org/rfc/rfc5246.html#section-4
# RFC 6962 only defines version 1 (0x00) of the merkle tree leaf and
# a single leaf type: timestamped entry (0x00).
if (version := leaf[0]) != 0:
raise ValueError(f"Unknown version {version}")
if (leaf_type := leaf[1]) != 0:
raise ValueError(f"Unknown leaf type {leaf_type}")
if leaf[10:12] != b"\x00\x00":
# Timestamped entry type 0x0000 designates a x509 certificate. Type
# 0x001 is a precert, which we can not use, and therefore ignore.
raise TypeError("Not x509 entry")
cert_length = int.from_bytes(leaf[12:15], "big")
cert_bytes = leaf[15 : 15 + cert_length]
cert = x509.load_der_x509_certificate(cert_bytes)
return cert
def forever(f):
@wraps(f)
async def wrapper(*args, **kwargs):
while True:
try:
await f(*args, **kwargs)
except Exception:
logger.exception("Retrying")
await asyncio.sleep(30)
except:
break
return wrapper
class Watcher:
page_size = 100
def __init__(self, server: str, queue: asyncio.Queue) -> None:
self.server = server
self.queue = queue
self.log = logger.bind(server=server)
self.tree_size = 0
self.tree_watcher = asyncio.create_task(self.watch_tree_size())
self.start = 0
self.end = 0
@forever
async def watch_tree_size(self) -> None:
r = await client.get(f"{self.server}ct/v1/get-sth")
self.tree_size = r.json()["tree_size"]
self.log.debug("Tree size", size=self.tree_size)
await asyncio.sleep(600)
@forever
async def watcher(self) -> None:
index = random.randrange(self.start, self.tree_size - self.page_size)
r = await client.get(f"{self.server}ct/v1/get-entries", params={"start": index, "end": index + self.page_size,},)
entries = r.json()["entries"]
now = datetime.now(tz=UTC)
expired = 0
for entry in entries:
leaf = base64.b64decode(entry["leaf_input"])
try:
cert = decode_cert(leaf)
except TypeError:
# Ignore precerts
continue
if cert.not_valid_before_utc > now:
continue
if cert.not_valid_after_utc < now:
expired += 1
continue
await self.queue.put(cert)
# All expired: move up
if len(entries) == expired > 5:
self.start = index
q = asyncio.Queue(maxsize=100)
async def asd():
while True:
# await asyncio.sleep(10)
cert = await q.get()
print(cert)
asyncio.run(main(q))

View file

@ -1,12 +1,11 @@
import asyncio import asyncio
import json import math
import os import os
import random
import websockets
from selenium import webdriver from selenium import webdriver
from selenium.common.exceptions import InvalidSessionIdException
from selenium.common.exceptions import WebDriverException from selenium.common.exceptions import WebDriverException
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.remote.webelement import WebElement
service = webdriver.FirefoxService( service = webdriver.FirefoxService(
# Selenium only checks /usr/bin/geckodriver by default # Selenium only checks /usr/bin/geckodriver by default
@ -24,40 +23,29 @@ driver = webdriver.Firefox(service=service, options=options)
driver.set_page_load_timeout(3) driver.set_page_load_timeout(3)
async def ct_stream(domains: asyncio.Queue) -> None: async def surf(url: str) -> None:
"""Watch Certificate Transparency (CT) logs for new certificates.""" """Surf around URL for a bit."""
while True: for i in range(math.ceil(random.expovariate(0.5))):
print("🏄" if i == 0 else "🔗", url)
try: try:
async with websockets.connect("wss://certstream.calidog.io") as websocket: await asyncio.to_thread(driver.get, url)
async for message_data in websocket: # Find all links on page. This is *much* faster than find_elements("a") + get_attribute("href")
ct_handler(message_data, domains) links = await asyncio.to_thread(
except (KeyboardInterrupt, asyncio.CancelledError): driver.execute_script,
return "return [...document.links].filter(a => !!a.host && a.href != location.href && !a.href.includes('#')).map(a => a.href);",
except Exception as e: )
except InvalidSessionIdException:
# Browser closed: no way to recover
raise
except WebDriverException as e:
print(e) print(e)
print(type(e))
# Timeout, network error, JavaScript failure etc.
def ct_handler(data: websockets.Data, domains: asyncio.Queue) -> None: break
"""Save certificate's domain to queue if needed.""" try:
# There are A LOT of certificates coming through the transparency logs; url = random.choice(links)
# immediately bail without spending time decoding the message if we have except IndexError:
# enough domains queued up already. break
if domains.full():
return
message = json.loads(data)
if message["message_type"] != "certificate_update":
return
# Certificates can verify multiple domains: We arbitrarily select the first
# non-wildcard one since we cannot connect to such host in the browser.
cert_domains = message["data"]["leaf_cert"]["all_domains"]
try:
cert_domain = next(d for d in cert_domains if "*" not in d)
except StopIteration:
return
domains.put_nowait(cert_domain)
async def surfer() -> None: async def surfer() -> None:
@ -65,13 +53,10 @@ async def surfer() -> None:
domains = asyncio.Queue(maxsize=50) domains = asyncio.Queue(maxsize=50)
ct_stream_task = asyncio.create_task(ct_stream(domains)) ct_stream_task = asyncio.create_task(ct_stream(domains))
while True: while True:
domain = await domains.get()
url = f"https://{domain}"
print("🏄", url)
try: try:
await asyncio.to_thread(driver.get, url) domain = await domains.get()
except WebDriverException: url = f"https://{domain}"
pass await surf(url)
except (KeyboardInterrupt, asyncio.CancelledError): except (KeyboardInterrupt, asyncio.CancelledError):
break break
ct_stream_task.cancel() ct_stream_task.cancel()

71
autosurfer/test.py Normal file

File diff suppressed because one or more lines are too long

View file

@ -45,8 +45,10 @@
}) })
pkgs.geckodriver pkgs.geckodriver
(pkgs.python3.withPackages (ps: [ (pkgs.python3.withPackages (ps: [
ps.cryptography
ps.httpx
ps.selenium ps.selenium
ps.websockets ps.structlog
])) ]))
# pkgs.bashInteractive # pkgs.bashInteractive
# pkgs.coreutils # pkgs.coreutils