Compare commits
No commits in common. "7f5b867c3755862ef5e361ce17b985d87e9d7ebb" and "75350d256f63a305bb50d370083c4b4be6cd8b36" have entirely different histories.
7f5b867c37
...
75350d256f
10
README.md
10
README.md
|
@ -25,18 +25,12 @@ newly issued certificates and attempts to open the domain in Firefox using
|
|||
Selenium.
|
||||
|
||||
|
||||
## Development
|
||||
## Building
|
||||
```shell
|
||||
# Build
|
||||
nix build .#oci
|
||||
./result | podman load
|
||||
podman run --rm autosurfer:dev
|
||||
|
||||
# Release
|
||||
podman push autosurfer:dev quay.io/caspervk/autosurfer:latest
|
||||
|
||||
# 👉😎👉
|
||||
podman run --rm -v ./autosurfer/:/autosurfer/:ro --network host --env DISPLAY --security-opt label=type:container_runtime_t autosurfer:dev
|
||||
# podman push autosurfer:dev quay.io/caspervk/autosurfer:latest
|
||||
```
|
||||
|
||||
|
||||
|
|
177
autosurfer/ct.py
177
autosurfer/ct.py
|
@ -1,177 +0,0 @@
|
|||
from datetime import UTC, datetime
|
||||
import logging
|
||||
import random
|
||||
from functools import wraps
|
||||
from json import JSONDecodeError
|
||||
import asyncio
|
||||
import base64
|
||||
|
||||
from cryptography import x509
|
||||
import httpx
|
||||
import structlog
|
||||
|
||||
|
||||
logger = structlog.stdlib.get_logger()
|
||||
client = httpx.AsyncClient()
|
||||
|
||||
|
||||
async def get_servers() -> set[str]:
|
||||
"""TODO."""
|
||||
# The format of these server lists are not part of the RFC.
|
||||
# https://certificate.transparency.dev/useragents/
|
||||
server_lists = {
|
||||
"https://www.gstatic.com/ct/log_list/v3/log_list.json",
|
||||
"https://valid.apple.com/ct/log_list/current_log_list.json",
|
||||
}
|
||||
servers = set()
|
||||
now = datetime.now(tz=UTC)
|
||||
for server_list in server_lists:
|
||||
try:
|
||||
r = await client.get(server_list)
|
||||
r.raise_for_status()
|
||||
servers.update(
|
||||
log["url"]
|
||||
for operator in r.json()["operators"]
|
||||
for log in operator["logs"]
|
||||
if ("usable" in log["state"]
|
||||
and datetime.fromisoformat(log["temporal_interval"]["start_inclusive"]) <= now
|
||||
and datetime.fromisoformat(log["temporal_interval"]["end_exclusive"]) > now)
|
||||
)
|
||||
except:
|
||||
logger.exception("Error in log server list")
|
||||
continue
|
||||
if not servers:
|
||||
raise ValueError("All log server lists failed")
|
||||
return servers
|
||||
|
||||
|
||||
|
||||
def decode_cert(leaf: bytes) -> x509.Certificate:
|
||||
# MerkleTreeLeaf for timestamped entry containing an x509 certificate:
|
||||
#
|
||||
# +------+-----------------------+
|
||||
# | Byte | |
|
||||
# +------+-----------------------+
|
||||
# | 0 | Version |
|
||||
# +------+-----------------------+
|
||||
# | 1 | Leaf type |
|
||||
# +------+-----------------------+
|
||||
# | 2 | |
|
||||
# | 3 | |
|
||||
# | 4 | |
|
||||
# | 5 | Timestamp |
|
||||
# | 6 | |
|
||||
# | 7 | |
|
||||
# | 8 | |
|
||||
# | 9 | |
|
||||
# +------+-----------------------+
|
||||
# | 10 | Entry type |
|
||||
# | 11 | |
|
||||
# +------+-----------------------+
|
||||
# | 12 | |
|
||||
# | 13 | Cert length (n) |
|
||||
# | 14 | |
|
||||
# +------+-----------------------+
|
||||
# | 15 | |
|
||||
# | .. | x509 DER cert |
|
||||
# | n | |
|
||||
# +------+-----------------------+
|
||||
# | n+1 | CT extensions |
|
||||
# | .. | |
|
||||
# +------+-----------------------+
|
||||
#
|
||||
# https://www.rfc-editor.org/rfc/rfc6962.html#section-3.4
|
||||
# https://www.rfc-editor.org/rfc/rfc5246.html#section-4
|
||||
|
||||
# RFC 6962 only defines version 1 (0x00) of the merkle tree leaf and
|
||||
# a single leaf type: timestamped entry (0x00).
|
||||
if (version := leaf[0]) != 0:
|
||||
raise ValueError(f"Unknown version {version}")
|
||||
if (leaf_type := leaf[1]) != 0:
|
||||
raise ValueError(f"Unknown leaf type {leaf_type}")
|
||||
|
||||
if leaf[10:12] != b"\x00\x00":
|
||||
# Timestamped entry type 0x0000 designates a x509 certificate. Type
|
||||
# 0x001 is a precert, which we can not use, and therefore ignore.
|
||||
raise TypeError("Not x509 entry")
|
||||
|
||||
cert_length = int.from_bytes(leaf[12:15], "big")
|
||||
cert_bytes = leaf[15 : 15 + cert_length]
|
||||
cert = x509.load_der_x509_certificate(cert_bytes)
|
||||
return cert
|
||||
|
||||
|
||||
def forever(f):
|
||||
@wraps(f)
|
||||
async def wrapper(*args, **kwargs):
|
||||
while True:
|
||||
try:
|
||||
await f(*args, **kwargs)
|
||||
except Exception:
|
||||
logger.exception("Retrying")
|
||||
await asyncio.sleep(30)
|
||||
except:
|
||||
break
|
||||
return wrapper
|
||||
|
||||
|
||||
class Watcher:
|
||||
page_size = 100
|
||||
|
||||
def __init__(self, server: str, queue: asyncio.Queue) -> None:
|
||||
self.server = server
|
||||
self.queue = queue
|
||||
|
||||
self.log = logger.bind(server=server)
|
||||
|
||||
self.tree_size = 0
|
||||
self.tree_watcher = asyncio.create_task(self.watch_tree_size())
|
||||
|
||||
self.start = 0
|
||||
self.end = 0
|
||||
|
||||
@forever
|
||||
async def watch_tree_size(self) -> None:
|
||||
r = await client.get(f"{self.server}ct/v1/get-sth")
|
||||
self.tree_size = r.json()["tree_size"]
|
||||
self.log.debug("Tree size", size=self.tree_size)
|
||||
await asyncio.sleep(600)
|
||||
|
||||
@forever
|
||||
async def watcher(self) -> None:
|
||||
index = random.randrange(self.start, self.tree_size - self.page_size)
|
||||
r = await client.get(f"{self.server}ct/v1/get-entries", params={"start": index, "end": index + self.page_size,},)
|
||||
entries = r.json()["entries"]
|
||||
|
||||
now = datetime.now(tz=UTC)
|
||||
expired = 0
|
||||
for entry in entries:
|
||||
leaf = base64.b64decode(entry["leaf_input"])
|
||||
try:
|
||||
cert = decode_cert(leaf)
|
||||
except TypeError:
|
||||
# Ignore precerts
|
||||
continue
|
||||
if cert.not_valid_before_utc > now:
|
||||
continue
|
||||
if cert.not_valid_after_utc < now:
|
||||
expired += 1
|
||||
continue
|
||||
await self.queue.put(cert)
|
||||
|
||||
# All expired: move up
|
||||
if len(entries) == expired > 5:
|
||||
self.start = index
|
||||
|
||||
|
||||
q = asyncio.Queue(maxsize=100)
|
||||
|
||||
|
||||
async def asd():
|
||||
while True:
|
||||
# await asyncio.sleep(10)
|
||||
cert = await q.get()
|
||||
print(cert)
|
||||
|
||||
|
||||
asyncio.run(main(q))
|
|
@ -1,11 +1,12 @@
|
|||
import asyncio
|
||||
import math
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
|
||||
import websockets
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import InvalidSessionIdException
|
||||
from selenium.common.exceptions import WebDriverException
|
||||
from selenium.webdriver.firefox.service import Service
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
service = webdriver.FirefoxService(
|
||||
# Selenium only checks /usr/bin/geckodriver by default
|
||||
|
@ -23,29 +24,40 @@ driver = webdriver.Firefox(service=service, options=options)
|
|||
driver.set_page_load_timeout(3)
|
||||
|
||||
|
||||
async def surf(url: str) -> None:
|
||||
"""Surf around URL for a bit."""
|
||||
for i in range(math.ceil(random.expovariate(0.5))):
|
||||
print("🏄" if i == 0 else "🔗", url)
|
||||
async def ct_stream(domains: asyncio.Queue) -> None:
|
||||
"""Watch Certificate Transparency (CT) logs for new certificates."""
|
||||
while True:
|
||||
try:
|
||||
await asyncio.to_thread(driver.get, url)
|
||||
# Find all links on page. This is *much* faster than find_elements("a") + get_attribute("href")
|
||||
links = await asyncio.to_thread(
|
||||
driver.execute_script,
|
||||
"return [...document.links].filter(a => !!a.host && a.href != location.href && !a.href.includes('#')).map(a => a.href);",
|
||||
)
|
||||
except InvalidSessionIdException:
|
||||
# Browser closed: no way to recover
|
||||
raise
|
||||
except WebDriverException as e:
|
||||
async with websockets.connect("wss://certstream.calidog.io") as websocket:
|
||||
async for message_data in websocket:
|
||||
ct_handler(message_data, domains)
|
||||
except (KeyboardInterrupt, asyncio.CancelledError):
|
||||
return
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(type(e))
|
||||
# Timeout, network error, JavaScript failure etc.
|
||||
break
|
||||
|
||||
|
||||
def ct_handler(data: websockets.Data, domains: asyncio.Queue) -> None:
|
||||
"""Save certificate's domain to queue if needed."""
|
||||
# There are A LOT of certificates coming through the transparency logs;
|
||||
# immediately bail without spending time decoding the message if we have
|
||||
# enough domains queued up already.
|
||||
if domains.full():
|
||||
return
|
||||
|
||||
message = json.loads(data)
|
||||
if message["message_type"] != "certificate_update":
|
||||
return
|
||||
|
||||
# Certificates can verify multiple domains: We arbitrarily select the first
|
||||
# non-wildcard one since we cannot connect to such host in the browser.
|
||||
cert_domains = message["data"]["leaf_cert"]["all_domains"]
|
||||
try:
|
||||
url = random.choice(links)
|
||||
except IndexError:
|
||||
break
|
||||
cert_domain = next(d for d in cert_domains if "*" not in d)
|
||||
except StopIteration:
|
||||
return
|
||||
|
||||
domains.put_nowait(cert_domain)
|
||||
|
||||
|
||||
async def surfer() -> None:
|
||||
|
@ -53,10 +65,13 @@ async def surfer() -> None:
|
|||
domains = asyncio.Queue(maxsize=50)
|
||||
ct_stream_task = asyncio.create_task(ct_stream(domains))
|
||||
while True:
|
||||
try:
|
||||
domain = await domains.get()
|
||||
url = f"https://{domain}"
|
||||
await surf(url)
|
||||
print("🏄", url)
|
||||
try:
|
||||
await asyncio.to_thread(driver.get, url)
|
||||
except WebDriverException:
|
||||
pass
|
||||
except (KeyboardInterrupt, asyncio.CancelledError):
|
||||
break
|
||||
ct_stream_task.cancel()
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue