commit 49eb78d5c060f7a50677f9316f2c9fb14d1ab7ec Author: Casper V. ### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

### Python Patch ###
.venv/

### Python.VirtualEnv Stack ###
# Virtualenv
#
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
pip-selfcheck.json

# End of,pycharm,pycharm+all,pycharm+iml,emacs The dependencies can be installed from the Debian repositories: +```bash +sudo apt install -y python3.7 python3-libtorrent libmpv1 +``` +**Or** from source: +```bash +# Python 3.7 ( +sudo apt build-dep -y python3 +wget +tar xf Python-3.7.1.tgz +cd Python-3.7.1 +./configure --enable-optimizations +make -j$(nproc) +sudo make altinstall + +# Libtorrent ( +sudo apt build-dep -y libtorrent-rasterbar +wget +tar xf libtorrent-rasterbar-1.1.11.tar.gz +cd libtorrent-rasterbar-1.1.11/ +PYTHON=$(which python3.7) ./configure --enable-python-binding --with-libiconv --disable-debug +make -j$(nproc) +sudo make install +sudo ldconfig +``` + +The libtorrent version from the Debian repository doesnt't seem to play nicely with Python 3.7, so please **check the +installation**: +```bash +python3.7 -c "import libtorrent; print(libtorrent.version)" +``` + +After installing the dependencies, the application can be installed using pip: +```bash +python3.7 -m pip install --upgrade +``` + +## Usage +The program can be started by running `silverstream` or `python3.7 -m silverstream`, depending on system configuration. +```text +usage: silverstream [-h] [--interface interface] [--port port] [--load] [--stats] [-v] [--clean] [--crawler-nodes nodes] + [--crawler-delay seconds] [--crawler-await-bootstrap] [--indexer-workers workers] + [--indexer-save-torrents] [--btdht-seed host:port] + +optional arguments: + -h, --help show this help message and exit + --interface interface Network interface to bind to. (default: + --port port Network port to listen listen on. Ports are bound consecutively from this port. + (default: 6881) + --load Load state from file (use 'save' from the cli to save). + --stats Save statistics to file. + -v, --verbose Increase verbosity level. Can be used multiple times. + --clean Remove data directory. + +Crawler: + --crawler-nodes nodes Number of BitTorrent DHT nodes to start. (default: 32) + --crawler-delay seconds Number of seconds to wait between starting each of the BitTorrent DHT nodes. (default: 1) + --crawler-await-bootstrap Wait for nodes to fully bootstrap before starting the next one. + +Indexer: + --indexer-workers workers Number of Indexer workers to start. (default: 25) + --indexer-save-torrents Save indexed torrents to torrents/. + +BitTorrent DHT: + --btdht-seed host:port BitTorrent DHT seed nodes. Overrides default seeds. Can be specified multiple times. +``` + +You may need to open some ports in your firewall. This command will allow a crawler with 128 nodes on UFW: +```bash +ufw allow 6881:7009/udp comment "silverstream" +``` diff --git a/ b/ new file mode 100644 index 0000000..553cddc --- /dev/null +++ b/ @@ -0,0 +1,50 @@ +# Always prefer setuptools over distutils +from setuptools import setup, find_packages +# To use a consistent encoding +from codecs import open +from os import path + +from silverstream import __author__, __version__, __licence__ + + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the README file +with open(path.join(here, ""), encoding="utf-8") as f: + long_description = + + +setup( + name="silverstream", + version=__version__, + description="A fully decentralised music streaming platform", + long_description=long_description, + long_description_content_type="text/markdown", + url="", + project_urls={ + "Source": "" + }, + author=__author__, + classifiers=[ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Programming Language :: Python :: 3", + ], + python_requires=">=3.7", + keywords="silverstream music streaming bittorrent kademlia p2p", + license=__licence__, + packages=find_packages(exclude=["tests"]), + install_requires=[ + "python-mpv", + ], + extras_require={ + "dev": [ + "matplotlib" + ] + }, + entry_points={ + "console_scripts": [ + "silverstream = silverstream.cli:main" + ] + }, +) diff --git a/silverstream.pdf b/silverstream.pdf new file mode 100644 index 0000000..2182f9b Binary files /dev/null and b/silverstream.pdf differ diff --git a/silverstream/ b/silverstream/ new file mode 100644 index 0000000..a553a69 --- /dev/null +++ b/silverstream/ @@ -0,0 +1,6 @@ +__version__ = "0.0.1" +__author__ = "Alexander Munch-Hansen & Casper V. Kristensen" +__licence__ = "GPLv3" + +import sys +assert sys.version_info >= (3, 7), "Silverstream requires python 3.7 or later." diff --git a/silverstream/ b/silverstream/ new file mode 100644 index 0000000..a59b396 --- /dev/null +++ b/silverstream/ @@ -0,0 +1,4 @@ + +if __name__ == "__main__": + from .cli import main + main() diff --git a/silverstream/bittorrent/ b/silverstream/bittorrent/ new file mode 100644 index 0000000..e69de29 diff --git a/silverstream/bittorrent/ b/silverstream/bittorrent/ new file mode 100644 index 0000000..204d1c5 --- /dev/null +++ b/silverstream/bittorrent/ @@ -0,0 +1,137 @@ +# +# This file is modified from +# Used to encode/decode BitTorrent Mainline DHT network communications. +# + + +DICT_DELIM = b'd' +END_DELIM = b'e' +INT_DELIM = b'i' +LIST_DELIM = b'l' +BYTE_SEP = b':' + + +def decode_int(x, f): + f += 1 + newf = x.index(END_DELIM, f) + n = int(x[f:newf]) + if x[f:f + 1] == b'-' and x[f + 1:f + 2] == b'0': + raise ValueError + elif x[f:f + 1] == b'0' and newf != f + 1: + raise ValueError + return (n, newf + 1) + + +def decode_string(x, f): + colon = x.index(BYTE_SEP, f) + n = int(x[f:colon]) + if x[f:f + 1] == b'0' and colon != f + 1: + raise ValueError + colon += 1 + return (x[colon:colon + n], colon + n) + + +def decode_list(x, f): + r, f = [], f + 1 + while x[f:f + 1] != END_DELIM: + v, f = decode_func[x[f:f + 1]](x, f) + r.append(v) + return (r, f + 1) + + +def decode_dict(x, f): + r, f = {}, f + 1 + while x[f:f + 1] != END_DELIM: + k, f = decode_string(x, f) + r[k.decode("utf8")], f = decode_func[x[f:f + 1]](x, f) + return (r, f + 1) + + +decode_func = { + LIST_DELIM: decode_list, + DICT_DELIM: decode_dict, + INT_DELIM: decode_int, + b'0': decode_string, + b'1': decode_string, + b'2': decode_string, + b'3': decode_string, + b'4': decode_string, + b'5': decode_string, + b'6': decode_string, + b'7': decode_string, + b'8': decode_string, + b'9': decode_string +} + + +def bdecode(x): + try: + r, __ = decode_func[x[0:1]](x, 0) + except (IndexError, KeyError, ValueError): + raise ValueError + else: + return r + + +class Bencached(object): + __slots__ = ['bencoded'] + + def __init__(self, s): + self.bencoded = s + + +def encode_bencached(x, r): + r.append(x.bencoded) + + +def encode_int(x, r): + r.extend((INT_DELIM, str(x).encode('utf8'), END_DELIM)) + + +def encode_bool(x, r): + encode_int(1 if x else 0, r) + + +def encode_string(x, r): + encode_bytes(x.encode('utf8'), r) + + +def encode_bytes(x, r): + r.extend((str(len(x)).encode('utf8'), BYTE_SEP, x)) + + +def encode_list(x, r): + r.append(LIST_DELIM) + for i in x: + encode_func[type(i)](i, r) + r.append(END_DELIM) + + +def encode_dict(x, r): + r.append(DICT_DELIM) + for k, v in sorted(x.items()): + try: + k = k.encode('utf8') + except AttributeError: + pass + r.extend((str(len(k)).encode('utf8'), BYTE_SEP, k)) + encode_func[type(v)](v, r) + r.append(END_DELIM) + + +encode_func = { + Bencached: encode_bencached, + int: encode_int, + list: encode_list, + tuple: encode_list, + dict: encode_dict, + bool: encode_bool, + str: encode_string, + bytes: encode_bytes +} + + +def bencode(x): + r = [] + encode_func[type(x)](x, r) + return b''.join(r) diff --git a/silverstream/bittorrent/ b/silverstream/bittorrent/ new file mode 100644 index 0000000..bee9c06 --- /dev/null +++ b/silverstream/bittorrent/ @@ -0,0 +1,101 @@ +from __future__ import annotations + +import asyncio +import logging +from collections import Sequence +from contextlib import suppress +from math import inf +from typing import Iterable, Optional + +import libtorrent + +from .database import TorrentDatabase +from .dht.node import Node +from .dht.util import distance +from ..config import data_dir +from ..util import async_take, Endpoint, humanize + +logger = logging.getLogger(__name__) + + +class TorrentClient: + def __init__(self, endpoint: Endpoint, torrent_database: TorrentDatabase, dht_nodes: Sequence[Node]) -> None: +"Starting torrent client on %s", endpoint) + self.torrent_database = torrent_database + self.dht_nodes = dht_nodes + self.session = libtorrent.session({ + "listen_interfaces": str(endpoint), + "enable_dht": False, # we'll use our own + }) + # TODO: The recommended practice is to first pause the session, then generate the fast resume data, and then close it down. + + @property + def status(self) -> libtorrent.session_status: + return self.session.status() + + async def get_torrent_handle(self, info_hash: bytes) -> Optional[libtorrent.torrent_handle]: + while True: + handle = self.session.find_torrent(libtorrent.sha1_hash(info_hash)) + if handle.is_valid(): + return handle + await asyncio.sleep(1) + + async def download(self, info_hash: bytes, files: Iterable[int] = None) -> libtorrent.torrent_info: + """ + Download data of the given torrent file. + + :param info_hash: The info hash of the torrent. + :param files: File-indexes to download. Downloads all by default. + :return Torrent info of downloaded torrent. + """ +"Downloading %s (files: %s)", info_hash, files) + params = { + "info_hash": info_hash, + "save_path": str(data_dir.joinpath("downloads/")), + "auto_managed": False, + "paused": True + } + with suppress(StopIteration): + torrent_file = next(data_dir.joinpath("torrents/").glob(f"{info_hash.hex()}*.torrent")) +"Using file %s", torrent_file) + params.update({"ti": libtorrent.torrent_info(str(torrent_file))}) + handle: libtorrent.torrent_handle = self.session.add_torrent(params) + handle.set_sequential_download(True) # apparently doesn't work setting it in the constructor + if files is not None: + handle.prioritize_files([int(f in files) for f in range(100_000)]) + handle.resume() + # TODO: Add peers from torrent database? They might've been indexed days ago, so maybe not too relevant + for peer in self.torrent_database.get_peers(info_hash): + handle.connect_peer(peer) + + get_peers = min(self.dht_nodes, key=lambda n: distance(, info_hash)).get_peers(info_hash) + while True: + s = handle.status() + logger.debug(f"Downloading { or info_hash.hex()}: {s.progress:.2%} complete (eta: " + f"{torrent_eta(s):.0f}s, down: {humanize(s.download_rate, suffix='B/s')} up: " + f"{humanize(s.upload_rate, suffix='B/s')} peers: {s.num_peers}, candidates: " + f"{s.connect_candidates}) {s.state}") + if handle.is_finished(): + break + if s.download_payload_rate < 128_000: # 1024 kbit/s + logger.debug("Slow download rate; asking dht for 8 more peers") + async for peer in async_take(get_peers, 8): + handle.connect_peer(peer) + await asyncio.sleep(1) + + # TODO: Remove torrents where we only downloaded the metadata to keep the number of open connections low? + #if not files: + # self.session.remove_torrent(handle) +"Downloading %s (files: %s) completed", info_hash, files) + return handle.get_torrent_info() + + async def get_torrent_info(self, info_hash: bytes) -> libtorrent.torrent_info: +"Getting torrent info for %s", info_hash) + return await, files=[]) + + +def torrent_eta(torrent_status: libtorrent.torrent_status) -> float: + try: + return (torrent_status.total_wanted - torrent_status.total_wanted_done) / torrent_status.download_payload_rate + except ZeroDivisionError: + return inf diff --git a/silverstream/bittorrent/ b/silverstream/bittorrent/ new file mode 100644 index 0000000..98e70e8 --- /dev/null +++ b/silverstream/bittorrent/ @@ -0,0 +1,78 @@ +from __future__ import annotations + +import asyncio +import json +import logging +import random +import typing +from asyncio import Event +from typing import List, Iterator + +from .database import TorrentDatabase +from .dht import config +from .dht.node import Node +from ..config import data_dir +from ..util import split_interval, i2b, Endpoint + +if typing.TYPE_CHECKING: # PyCharm + from .dht.node import Peer, Contact + +logger = logging.getLogger(__name__) + + +class Crawler: + def __init__(self, torrent_database: TorrentDatabase, endpoints: Iterator[Endpoint] = None, num_nodes=32, delay=1, + seeds: List[Contact] = None, nodes: List[Node] = None, await_bootstrap=False) -> None: + self.torrent_database = torrent_database + self.nodes: List[Node] = nodes or [] + + self.started = Event() + asyncio.create_task(self._start(endpoints, num_nodes, delay, seeds, await_bootstrap)) + + async def _start(self, endpoints, num_nodes, delay, seeds, await_bootstrap): + # If we were given nodes in the constructor, e.g. by Crawler.load(), just wait for the first to bootstrap + if self.nodes: + await asyncio.wait([node.started.wait() for node in self.nodes], return_when=asyncio.FIRST_COMPLETED) + self.started.set() + return + +"Starting %s nodes", num_nodes) + ids = [i2b(random.randint(a, b), length=20) for a, b in split_interval(config.id_space, num_nodes)] + #ids = [i2b(random.randint(0, config.id_space), length=20) for _ in range(num_nodes)] + for i, id in enumerate(ids): + node = Node(endpoint=next(endpoints), + id=id, + seeds=seeds, + torrent_database=self.torrent_database) + if await_bootstrap: + await node.started.wait() + self.nodes.append(node) + self.started.set() + logger.debug("Waiting %ss..", delay) + await asyncio.sleep(delay) + + @property + def peers(self) -> List[Peer]: + return [peer + for node in self.nodes + for peer in node.routing_table.peers] + + @property + def replacements(self) -> List[Peer]: + return [peer + for node in self.nodes + for bucket in node.routing_table.buckets + for peer in bucket.replacements] + + def save(self, filepath=data_dir.joinpath("crawler.json")) -> None: + with"w") as file: + json.dump([node.json() for node in self.nodes], file) + + @staticmethod + def load(endpoints: Iterator[Endpoint], torrent_database: TorrentDatabase, + filepath=data_dir.joinpath("crawler.json")) -> Crawler: + with as file: + nodes = json.load(file) + + return Crawler(torrent_database=torrent_database, + nodes=[Node.from_json(node, next(endpoints), torrent_database) for node in nodes]) diff --git a/silverstream/bittorrent/ b/silverstream/bittorrent/ new file mode 100644 index 0000000..0dafa6b --- /dev/null +++ b/silverstream/bittorrent/ @@ -0,0 +1,189 @@ +from __future__ import annotations + +import logging +import re +import sqlite3 +import string +from enum import IntEnum +from typing import List, Optional, TypeVar, Iterable, Tuple + +from .peer import TorrentPeer +from ..config import data_dir +from ..util import grouper + +T = TypeVar("T") +logger = logging.getLogger(__name__) + + +class IndexStatus(IntEnum): + NotIndexed = 0 + Indexed = 1 + Indexing = 2 + IndexingFailed = 3 + + +class TorrentDatabase: + def __init__(self, filepath=data_dir.joinpath("torrents.sqlite")) -> None: + data_dir.mkdir(parents=True, exist_ok=True) + self.connection = sqlite3.connect(filepath, check_same_thread=False) + self.connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name + self.connection.text_factory = bytes # don't try to decode bytes as utf-8 strings + self.connection.executescript(f""" + CREATE TABLE IF NOT EXISTS + torrents (id INTEGER PRIMARY KEY, + info_hash BLOB UNIQUE NOT NULL, + name TEXT DEFAULT NULL, + peers BLOB DEFAULT x'', + indexed INTEGER DEFAULT {IndexStatus.NotIndexed.value}, + non_music INTEGER DEFAULT NULL); + + UPDATE torrents + SET indexed = {IndexStatus.NotIndexed.value} + WHERE indexed = {IndexStatus.Indexing.value}; -- reset status if indexing was interrupted + + CREATE VIRTUAL TABLE IF NOT EXISTS + songs USING FTS5(torrent_id UNINDEXED, + file_index UNINDEXED, + name, + tokenize='porter unicode61 remove_diacritics 1'); + VACUUM; + """) + + def add_peer(self, info_hash: bytes, peer: Optional[TorrentPeer] = None, name: str = None) -> None: + logger.debug("Adding peer %s to info hash %s", peer, info_hash) + self.connection.execute("INSERT OR IGNORE INTO torrents (info_hash) VALUES (?)", (info_hash,)) + + if name is not None and"720p|1080p|\.mp4|\.mkv|\.rar|\.avi|\.zip|XviD|XXX|x26[45]|S\d\dE\d\d", + name, flags=re.IGNORECASE): + logger.debug("Torrent %s with name %s is non-music", info_hash, name) + self.connection.execute(""" + UPDATE torrents + SET non_music = 1 + WHERE info_hash = ?; + """, (info_hash,)) + + if peer is not None: + self.connection.execute(""" + UPDATE torrents + SET peers = CASE + WHEN INSTR(HEX(peers), HEX(:compact)) THEN -- if peer is a substring of existing peers + peers -- then don't change peers + ELSE + peers || :compact -- otherwise, concat peer on existing peers + END, + name = :name + WHERE info_hash = :info_hash; + """, {"compact": peer.compact, "name": name, "info_hash": info_hash}) + self.connection.commit() + + def get_peers(self, info_hash: bytes) -> List[TorrentPeer]: + row = self.connection.execute(""" + SELECT peers + FROM torrents + WHERE info_hash = ?; + """, (info_hash,)).fetchone() + if row is None: + return [] + return [TorrentPeer.from_compact(b) for b in grouper(row["peers"], 6)] # each peer is 6 bytes + + def get_unindexed(self, limit=10) -> List[bytes]: + rows = self.connection.execute(f""" + SELECT id, info_hash + FROM torrents + WHERE indexed = {IndexStatus.NotIndexed.value} AND (non_music IS NULL OR non_music != 1) + ORDER BY LENGTH(peers) DESC -- most popular first + LIMIT {limit}; + """).fetchall() + if not rows: + return [] + ids, info_hashes = zip(*rows) + self.connection.execute(f""" + UPDATE torrents + SET indexed = {IndexStatus.Indexing.value} + WHERE id IN ({",".join(("?"*len(ids)))}); + """, (*ids,)) + self.connection.commit() + return info_hashes + + def set_index_status(self, info_hash: bytes, index_status: IndexStatus) -> None: + self.connection.execute(f""" + UPDATE torrents + SET indexed = {index_status.value} + WHERE info_hash = ? + """, (info_hash,)) + self.connection.commit() + + def get_index_status(self, info_hash: bytes) -> Optional[IndexStatus]: + status = self.connection.execute(f""" + SELECT indexed + FROM torrents + WHERE info_hash = ? + """, (info_hash,)).fetchone() + if not status: + return None + return IndexStatus(status[0]) + + def add_songs(self, info_hash: bytes, files: Iterable[Tuple[int, str]]) -> None: + logger.debug("Adding songs %s", files) + torrent_id = self.get_torrent_id_from_hash(info_hash) + self.connection.executemany(""" + INSERT INTO songs(torrent_id, file_index, name) + VALUES (?, ?, ?); + """, ((torrent_id, file_index, name) for file_index, name in files)) + self.connection.commit() + + def search_song(self, query: str, limit=10) -> List[dict]: + query = re.sub(f"[{string.punctuation}]", "_", query) + rows = self.connection.execute(f""" + SELECT info_hash, file_index, song_name + FROM (SELECT torrent_id, file_index, name AS song_name + FROM songs + WHERE song_name MATCH ? + ORDER BY rank + LIMIT {limit}) AS best_songs + JOIN torrents ON = best_songs.torrent_id; + """, (query,)).fetchall() + + return [{"info_hash": info_hash, "file_index": file_index, "name": name.decode()} + for info_hash, file_index, name in rows] + + def get_torrent_id_from_hash(self, info_hash: bytes) -> int: + return self.connection.execute(""" + SELECT id + FROM torrents + WHERE info_hash = ?; + """, (info_hash,)).fetchone()["id"] + + def num_hashes(self) -> int: + return self.connection.execute(""" + SELECT COUNT(id) + FROM torrents; + """).fetchone()[0] + + def num_peers(self) -> int: + # LENGTH is weird with blobs so we use HEX/2 to get the correct number of bytes. Each peer is 6 bytes. + # We also use coalesce because the sum of nothing is 0, not null. + return self.connection.execute(""" + SELECT COALESCE(SUM(LENGTH(HEX(peers)))/(2*6), 0) + FROM torrents; + """).fetchone()[0] + + def num_indexed(self, status: IndexStatus = IndexStatus.Indexed) -> int: + return self.connection.execute(f""" + SELECT COUNT(id) + FROM torrents + WHERE indexed = {status.value} + """).fetchone()[0] + + def num_non_music(self) -> int: + return self.connection.execute(""" + SELECT COUNT(id) + FROM torrents + WHERE non_music = 1 + """).fetchone()[0] + + def num_songs(self) -> int: + return self.connection.execute(""" + SELECT COUNT(*) + FROM songs + """).fetchone()[0] diff --git a/silverstream/bittorrent/dht/ b/silverstream/bittorrent/dht/ new file mode 100644 index 0000000..e69de29 diff --git a/silverstream/bittorrent/dht/ b/silverstream/bittorrent/dht/ new file mode 100644 index 0000000..be9976d --- /dev/null +++ b/silverstream/bittorrent/dht/ @@ -0,0 +1,12 @@ +from datetime import timedelta + + +version = "S8E101" + +id_space = 2**160 # SHA1 + +k = 8 # each bucket can hold eight nodes before becoming full +b = 5 # accelerate lookup by also splitting buckets not containing the node's ID up to b - 1 levels +alpha = 3 # system-wide lookup concurrency parameter + +fresh_time = timedelta(minutes=15) # contacts are good for 15 minutes before becoming questionable diff --git a/silverstream/bittorrent/dht/ b/silverstream/bittorrent/dht/ new file mode 100644 index 0000000..4ee09c3 --- /dev/null +++ b/silverstream/bittorrent/dht/ @@ -0,0 +1,170 @@ +from __future__ import annotations + +import asyncio +import itertools +import logging +import random +import secrets +from asyncio import Event +from contextlib import suppress +from copy import deepcopy +from typing import List, Iterable, AsyncIterator + +from . import config +from .peer import Contact, PeerStatus, Peer +from .protocol import KRPCProtocol +from .routing import RoutingTable +from .util import closest +from ..database import TorrentDatabase +from ..peer import TorrentPeer +from ...util import sha1, Endpoint, async_last + +logger = logging.getLogger(__name__) + + +class Node: + def __init__(self, endpoint: Endpoint, torrent_database: TorrentDatabase, id: bytes = None, + seeds: List[Contact] = None, bootstrap=True) -> None: + self.torrent_database = torrent_database + self.interface, self.port = endpoint + = id or sha1(secrets.token_bytes()) + self.seeds = seeds + + self.routing_table = RoutingTable(self) + self.transport = None + self.protocol = None + self.started = Event() + asyncio.create_task(self._start(bootstrap)) + + async def _start(self, bootstrap): +"Starting node on %s:%s with id %s", self.interface, self.port, + self.transport, self.protocol = await asyncio.get_event_loop().create_datagram_endpoint( + lambda: KRPCProtocol(self), + local_addr=(self.interface, self.port) + ) + if bootstrap: + await self.bootstrap() + self.started.set() + asyncio.create_task(self._routing_table_refresher()) + + async def bootstrap(self) -> None: +"Bootstrapping") + bootstrap_node = await self._get_seed() + await self.find_nodes( + # Refresh all buckets further away than the bucket the bootstrap node falls in + await asyncio.gather(*[self.routing_table.refresh_bucket(bucket) + for bucket in self.routing_table.buckets + if not bucket.fits(], + return_exceptions=True) + + async def _get_seed(self) -> Peer: + # From + seeds = [ + ("", 6881), + ("", 6881), + ("", 6881), + ("", 6881), + ("", 25401), + ] + random.shuffle(seeds) + + # Return the first seed that responds to our pings + loop = asyncio.get_event_loop() + for seed in itertools.cycle(self.seeds or seeds): + logger.debug("Trying %s:%s", *seed) + with suppress(TimeoutError): + addr_info = await loop.getaddrinfo(*seed) # translate hostname to IPv4 address + contact = Contact(*addr_info[0][4]) + id = await self.protocol.send_ping(contact) + return Peer(contact.address, contact.port, id) + + async def _routing_table_refresher(self): + while True: + await asyncio.sleep(random.randint(5*60, 15*60)) + await self.routing_table.refresh_table() + + async def find_nodes(self, target: bytes) -> Iterable[Peer]: + """ + Find the contact information of the k closest nodes to the given id. + + :param target: The id of the node we seek. + :return: The contact information of the k closest found peers. + """ + logger.debug("Finding closest nodes to %s", target) + return await async_last(self._lookup(target, self.protocol.send_find_node)) + + async def get_peers(self, info_hash: bytes) -> AsyncIterator[TorrentPeer]: + """ + Get peers associated with a torrent infohash. + + :param info_hash: The infohash of the torrent. + :return: Async iterable of TorrentPeers. + """ + logger.debug("Getting peers for infohash: %s", info_hash) + async for nodes, peers in self._lookup(info_hash, self.protocol.send_get_peers, response_handler=lambda r: r[0], + yield_responses=True): + for peer in peers: + yield peer + + async def _lookup(self, target: bytes, rpc: callable, k=config.k, alpha=config.alpha, response_handler=lambda r: r, + yield_responses=False) -> AsyncIterator: + known = {} + for peer in deepcopy(self.routing_table.peers): # copy so we don't update the PeerStatus in the routing table + known[] = peer + peer.status = PeerStatus.QUESTIONABLE # we haven't queried any peer during this lookup yet + + previous_best = set() + while True: + # Of the peers we have heard of, pick the alpha closest that have not yet been queried + chosen = closest(alpha, target, {peer for peer in known.values() if peer.status == PeerStatus.QUESTIONABLE}) + + # Send parallel asynchronous queries to the chosen peers + results = await asyncio.gather(*[rpc(, target) for peer in chosen], return_exceptions=True) + for peer, response in zip(chosen, results): + # Peers that fail to respond are removed from consideration (this updates PeerStatus in the known dict) + if isinstance(response, Exception): + peer.status = PeerStatus.BAD + continue + peers = response_handler(response) + if yield_responses: + yield response + peer.status = PeerStatus.GOOD + # Only add new nodes if they weren't already known as we don't want to overwrite existing PeerStatus + known.update({ peer + for peer in peers + if not in known}) + + best = set(closest(k, target, {peer for peer in known.values() if peer.status != PeerStatus.BAD})) + #logger.debug("Round mean distance: 2^%s", log2(statistics.mean(distance(target, for p in best))) + + # If a round of queries doesn't find any closer peers, query all k closest peers we haven't already queried + if best == previous_best: + alpha = k + + # The lookup terminates when we have queried the k closest peers we've seen + if all(peer.status == PeerStatus.GOOD for peer in best): + logger.debug("Closest: %s", best) + if not yield_responses: + yield best + return + + previous_best = best + + def json(self) -> dict: + return { + "id":, + "seeds": self.seeds, + "peers": [peer.json() for peer in self.routing_table.peers] + } + + @staticmethod + def from_json(data: dict, endpoint: Endpoint, torrent_database: TorrentDatabase) -> Node: + node = Node(endpoint=endpoint, + torrent_database=torrent_database, + id=bytes.fromhex(data["id"]), + seeds=data["seeds"], + bootstrap=False) + + for peer in data["peers"]: + node.routing_table.add(Peer.from_json(peer)) + return node diff --git a/silverstream/bittorrent/dht/ b/silverstream/bittorrent/dht/ new file mode 100644 index 0000000..d5f1278 --- /dev/null +++ b/silverstream/bittorrent/dht/ @@ -0,0 +1,104 @@ +from __future__ import annotations + +import logging +from datetime import datetime +from enum import Enum, auto +from typing import TypeVar, NamedTuple + +from . import config +from ...util import b2i, i2b + +logger = logging.getLogger(__name__) + + +class Contact(NamedTuple): + address: str + port: int + + @property + def compact(self) -> bytes: + address = bytes(map(int, self.address.split("."))) + port = i2b(self.port, length=2) + return address + port + + @staticmethod + def from_compact(b: bytes) -> Contact: + address = ".".join(map(str, b[:4])) + port = b2i(b[-2:]) + return Contact(address, port) + + +class PeerStatus(Enum): + GOOD = auto() # we've received a query/response from peer in the last 15 minutes + QUESTIONABLE = auto() # peer has been inactive for more than 15 minutes + BAD = auto() # peer has failed to respond to multiple queries + + +class Peer: + def __init__(self, address: str, port: int, id: bytes, status: PeerStatus = PeerStatus.QUESTIONABLE) -> None: + self.address = address + self.port = port + bytes = id + + self._last_seen = None + self._status = None + + self.status = status + + @property + def contact(self): + return Contact(self.address, self.port) + + @property + def compact(self) -> bytes: + return + + + @property + def status(self): + if self._status is not None: + return self._status + + if self._last_seen + config.fresh_time > datetime.utcnow(): + return PeerStatus.GOOD + return PeerStatus.QUESTIONABLE + + @status.setter + def status(self, status: PeerStatus): + self._status = None # reset override + if status == PeerStatus.GOOD: + self._last_seen = datetime.utcnow() + elif status == PeerStatus.QUESTIONABLE: + self._last_seen = datetime.min + elif status == PeerStatus.BAD: + self._status = PeerStatus.BAD + + @staticmethod + def from_compact(b: bytes, status: PeerStatus = PeerStatus.QUESTIONABLE) -> Peer: + id = b[:20] + address, port = Contact.from_compact(b[-6:]) + return Peer(address, port, id, status) + + def json(self) -> dict: + return { + "address": self.address, + "port": self.port, + "id":, + "last_seen": self._last_seen.timestamp(), + "status":, + } + + @staticmethod + def from_json(data: dict) -> Peer: + peer = Peer(address=data["address"], + port=data["port"], + id=bytes.fromhex(data["id"]), + status=PeerStatus[data["status"]]) + peer._last_seen = datetime.fromtimestamp(data["last_seen"]) + return peer + + def __repr__(self) -> str: + return "Peer(address={address}, port={port}, id={id}, last_seen={_last_seen}, status={status})" \ + .format(**self.__dict__, + + +PC = TypeVar("PC", Peer, Contact) diff --git a/silverstream/bittorrent/dht/ b/silverstream/bittorrent/dht/ new file mode 100644 index 0000000..9bf4f5e --- /dev/null +++ b/silverstream/bittorrent/dht/ @@ -0,0 +1,376 @@ +from __future__ import annotations + +import asyncio +import logging +import secrets +import typing +from collections import defaultdict +from contextlib import suppress +from enum import Enum +from typing import Tuple, Dict, Union, List, DefaultDict + +from . import config +from .peer import Contact, PeerStatus, Peer +from ..bencode import bdecode, bencode +from ..peer import TorrentPeer +from ...util import grouper + +if typing.TYPE_CHECKING: # PyCharm + from .node import Node + + +logger = logging.getLogger(__name__) + + +class Method(Enum): + PING = "ping" + FIND_NODE = "find_node" + GET_PEERS = "get_peers" + ANNOUNCE_PEER = "announce_peer" + + +class KRPCError(Exception): + code: int + pass + + +class GenericError(KRPCError): + code = 201 + + +class ServerError(KRPCError): + code = 202 + + +class ProtocolError(KRPCError): + code = 203 + + +class UnknownMethodError(KRPCError): + code = 204 + + +class KRPCProtocol(asyncio.DatagramProtocol): + def __init__(self, node: Node, timeout=2) -> None: + self.node = node + self.timeout = timeout + + self.transport = None + self.transactions: Dict[Tuple[Contact, bytes], Tuple[asyncio.Future, asyncio.TimerHandle]] = {} + self.sent_announce_tokens: DefaultDict[str, bytes] = defaultdict(lambda: secrets.token_bytes(8)) + self.received_announce_tokens: Dict[Contact, bytes] = {} + + def connection_made(self, transport) -> None: + self.transport = transport + + def datagram_received(self, data, addr) -> None: + # This is how you do it, apparently ( + asyncio.create_task(self._process_data(data, Contact(*addr))) + + async def _process_data(self, data: bytes, contact: Contact) -> None: + try: + message = bdecode(data) + message_type = message["y"].decode() + handler = { + "q": self._handle_query, + "r": self._handle_response, + "e": self._handle_error, + }[message_type] + except ValueError as error: + logger.debug("Failed to decode data: %s (%s):", data, error) + return + except KeyError as error: + logger.debug("Invalid message type for data: %s (%s)", data, error) + return + await handler(contact, message) + + async def _handle_query(self, contact: Contact, query: dict) -> None: + method_name = query["q"].decode() + arguments = query["a"] + token = query["t"] + try: + method = Method(method_name) + handler = { + Method.PING: self._receive_ping, + Method.FIND_NODE: self._receive_find_node, + Method.GET_PEERS: self._receive_get_peers, + Method.ANNOUNCE_PEER: self._receive_announce_peer, + }[method] + except ValueError: + logger.debug("Received query for unknown method: %s", method_name) + return await self._send_error(contact, token, UnknownMethodError(f"Unknown Method '{method_name}'")) + + logger.debug("Received %s from %s: %s",, contact, query) + + # Let our routing table know about this GOOD peer + peer = Peer(contact.address, contact.port, arguments.pop("id"), status=PeerStatus.GOOD) + self.node.routing_table.add(peer) + + try: + result = await handler(peer, **arguments) + except KRPCError as error: + return await self._send_error(contact, token, error) + except Exception as error: + logger.debug("Exception in query handler: %s", error) + logger.exception(error) + return await self._send_error(contact, token, ServerError("Server Error")) + + response = { + "t": token, + "v": config.version, + "y": "r", + "r": { + "id":, + **result + } + } + logger.debug("Sending %s response to %s: %s",, peer, response) + self.transport.sendto(bencode(response), contact) + + async def _handle_response(self, contact: Contact, response: dict) -> None: + logger.debug("Received response from %s: %s", contact, response) + transaction_id = response["t"] + result = response["r"] + + # Let our routing table know about this GOOD peer + self.node.routing_table.add(Peer(contact.address, contact.port, result["id"], status=PeerStatus.GOOD)) + + self._set_result(contact, transaction_id, result) + + async def _handle_error(self, contact: Contact, response: dict) -> None: + logger.debug("Received error from %s: %s", contact, response) + transaction_id = response["t"] + error = response["e"] + + # Let our routing table know that this peer is responding (even though it gave us an error) + self.node.routing_table.update_peer_status(contact, PeerStatus.GOOD) + + self._set_result(contact, transaction_id, TimeoutError(error)) # not technically a timeout but whatever + + async def _send_error(self, contact: Contact, token: bytes, error: KRPCError) -> None: + response = { + "t": token, + "v": config.version, + "y": "e", + "e": [error.code, str(error)] + } + logger.debug("Sending %s error to %s: %s", error, contact, response) + self.transport.sendto(bencode(response), contact) + + def _set_result(self, contact: Contact, transaction_id: bytes, result: Union[dict, Exception]) -> None: + try: + future, timeout = self.transactions.pop((contact, transaction_id)) + except KeyError: + logger.debug("Unknown transaction id %s for %s", transaction_id, contact) + return + timeout.cancel() + if future.done(): # TODO: This should never happen + logger.error("About to set %s on future %s but future already done!", result, future) + return + if isinstance(result, Exception): + future.set_exception(result) + else: + future.set_result(result) + + def _query(self, contact: Contact, method: Method, arguments: dict = None) -> asyncio.Future: + transaction_id = secrets.token_bytes(2) # generated by the querying node and echoed in the response + query = { + "t": transaction_id, + "v": config.version, + "y": "q", + "q": method.value, + "a": { + "id":, + **(arguments or {}) + } + } + # Register asyncio future callback + loop = asyncio.get_event_loop() + future = loop.create_future() + timeout = loop.call_later(self.timeout, self._timeout, contact, transaction_id) + self.transactions[contact, transaction_id] = (future, timeout) + + logger.debug("Sending %s to %s: %s",, contact, query) + self.transport.sendto(bencode(query), contact) + + return future + + def _timeout(self, contact: Contact, transaction_id: bytes) -> None: + logger.debug("Request %s to %s timed out", transaction_id, contact) + self._set_result(contact, transaction_id, TimeoutError("Request timed out")) + # Let our routing table know that this peer is BAD + self.node.routing_table.update_peer_status(contact, PeerStatus.BAD) + + # PING + async def send_ping(self, contact: Contact) -> bytes: + """ + The most basic query is a ping. + + :param contact: The contact to ping. + :return: The queried contacts's id. + """ + result = await self._query(contact, Method.PING) + return result["id"] + + async def _receive_ping(self, peer: Peer, **kwargs) -> dict: + """ + The appropriate response to a ping has a single key "id" containing the node ID of the responding node. + + :param peer: The querying node. + :return: An empty dict, as our own id will be added automatically later. + """ + if kwargs: + logger.debug("_receive_ping received additional kwargs: %s", kwargs) + + return {} # our id will be added automatically later + + # FIND_NODE + async def send_find_node(self, contact: Contact, target: bytes) -> List[Peer]: + """ + Find node is used to find the contact information for a peer given its ID. + + :param contact: The contact to send the find_node query to. + :param target: The id of the node we seek. + :return: The contact information of the K closest GOOD peers in the contacts's routing table. + """ + result = await self._query(contact, Method.FIND_NODE, {"target": target}) + return decode_nodes(result["nodes"]) + + async def _receive_find_node(self, peer: Peer, target: bytes, want: List[bytes] = None, **kwargs) -> dict: + """ + Should respond with a key "nodes" and value of a string containing the compact node info for the K closest GOOD + peers in our own routing table. + + :param peer: The querying node. + :param target: The id of the node the sender seeks. + :param want: List containing either b'n4', b'n6' or both, indicating that the querying node requests IPv4 or + IPv6 nodes, respectively. + :return: The contact information of the target node or the K closest GOOD nodes in the our routing table. + """ + if kwargs: + logger.debug("_receive_find_node received additional kwargs: %s", kwargs) + + nodes = self.node.routing_table.closest(target, status=PeerStatus.GOOD) + return {"nodes": encode_nodes(nodes)} + + # GET_PEERS + async def send_get_peers(self, contact: Contact, info_hash: bytes) -> Tuple[List[Peer], List[TorrentPeer]]: + """ + Get peers associated with a torrent infohash. + + :param contact: The contact to send the get_peers query to. + :param info_hash: The infohash of the torrent. + :return: Tuple of nodes close to the infohash and TorrentPeers. + """ + result = await self._query(contact, Method.GET_PEERS, {"info_hash": info_hash}) + self.received_announce_tokens[contact] = result["token"] + + peers = decode_nodes(result.get("nodes", [])) + torrent_peers = decode_torrent_peers(result.get("values", [])) + return peers, torrent_peers + + async def _receive_get_peers(self, peer: Peer, info_hash: bytes, want: List[bytes] = None, noseed=False, + scrape=False, **kwargs) -> dict: + """ + Return a key "nodes", containing the K nodes in the our routing table closest to the infohash. In addition, if + we have peers for the infohash, return them in a key "values" as a list of TorrentPeers. A "token" key is also + included in the return value. The token value is a required argument for a future announce_peer query. + + :param peer: The querying node. + :param info_hash: The infohash of the torrent. + :param want: List containing either b'n4', b'n6' or both, indicating that the querying node requests IPv4 or + IPv6 nodes, respectively. + :param noseed: If true we should try to fill the values list with non-seed items on a best-effort basis. + :param scrape: If true and we and have database entries for the hash we must add bloom filters to the response. + :return: A token and both the TorrentPeers for the given infohash as well as the K closest nodes to it. + """ + if kwargs: + logger.debug("_receive_get_peers received additional kwargs: %s", kwargs) + + token = self.sent_announce_tokens[peer.address] + nodes = self.node.routing_table.closest(info_hash, status=PeerStatus.GOOD) + response = {"token": token, "nodes": encode_nodes(nodes)} + + peers = self.node.torrent_database.get_peers(info_hash) + if peers: + response.update({"values": encode_torrent_peers(peers)}) + + # TODO + # Add info_hash (and maybe peer?) to the peer database if we are sure the peer isn't obfuscating the infohash. + # How do we detect this? + # + # + + return response + + # ANNOUNCE_PEER + async def send_announce_peer(self, contact: Contact, info_hash: bytes, port: int, implied_port=False) -> bytes: + """ + Announce that we are downloading a torrent on the supplied port. + + :param contact: The contact to send the announce_peer query to. + :param info_hash: The infohash of the torrent we are downloading. + :param port: The port our torrent client is listening on. + :param implied_port: If true, the port argument should be ignored and the source port of the UDP packet should + be used as port instead (useful for peers behind a NAT that may not know their external port). + :return: The queried contacts's id. + """ + arguments = { + "info_hash": info_hash, + "port": port, + "token": self.received_announce_tokens[contact], + "implied_port": int(implied_port) + } + result = await self._query(contact, Method.ANNOUNCE_PEER, arguments) + return result["id"] + + async def _receive_announce_peer(self, peer: Peer, info_hash: bytes, token: bytes, port: int = None, + implied_port=False, seed=False, name: bytes = None, **kwargs) -> dict: + """ + Store the contact information of the querying node under the infohash in the database. + + :param peer: The querying node. + :param info_hash: The infohash of the torrent they are downloading. + :param token: The "token" we gave the querying node in response to a previous get_peers query. We must verify + that this token was previously sent to the same IP address as the querying node. + :param port: The port their torrent client is listening on. + :param implied_port: If true, the port argument should be ignored and the source port of the UDP packet should + be used as port instead (useful for peers behind a NAT that may not know their external port). + :param seed: True if the querying node is seeding the torrent it announces. + :param name: Name of the corresponding torrent file. + :return: An empty dict, as our own id will be added automatically later. + """ + if kwargs: + logger.debug("_receive_announce_peer received additional kwargs: %s", kwargs) + + if token != self.sent_announce_tokens[peer.address]: + raise ProtocolError("Invalid announce token") + + # If implied_port, the source port of the UDP packet should be used instead of the port argument + if implied_port: + port = peer.port + + with suppress(AttributeError): + name = name.decode(errors="replace") + + self.node.torrent_database.add_peer(info_hash, TorrentPeer(peer.address, port), name=name) + return {} + + +def decode_nodes(nodes: bytes) -> List[Peer]: + # Contact information for nodes is encoded as 26-byte strings concatenated together (wtf) + compact_nodes = map(bytes, grouper(nodes, 26)) + return [Peer.from_compact(node) for node in compact_nodes] + + +def encode_nodes(peers: List[Peer]) -> bytes: + return b"".join(p.compact for p in peers) + + +def decode_torrent_peers(peers: List[bytes]) -> List[TorrentPeer]: + # Contact information for TorrentPeers is encoded as a list of 6-byte strings (actually sensible, but inconsistent) + return [TorrentPeer.from_compact(peer) for peer in peers] + + +def encode_torrent_peers(peers: List[TorrentPeer]) -> List[bytes]: + return [p.compact for p in peers] diff --git a/silverstream/bittorrent/dht/ b/silverstream/bittorrent/dht/ new file mode 100644 index 0000000..d24d5bf --- /dev/null +++ b/silverstream/bittorrent/dht/ @@ -0,0 +1,236 @@ +from __future__ import annotations + +import asyncio +import logging +import random +import typing +from collections import OrderedDict +from contextlib import suppress +from typing import Tuple, List, Dict, Set, Iterator + +from . import config +from .peer import PeerStatus +from .util import closest +from ...util import b2i, i2b, log2 + +if typing.TYPE_CHECKING: # PyCharm + from .node import Node + from .peer import Contact, Peer + +logger = logging.getLogger(__name__) + + +class ReplacementCache: + def __init__(self, maxlen: int) -> None: + self.maxlen = maxlen + self.peers = OrderedDict() + + def add(self, peer: Peer) -> None: + compact = peer.compact + try: + self.peers.move_to_end(compact, last=True) # the cache should be kept sorted by time last seen + except KeyError: + if len(self.peers) >= self.maxlen: + self.peers.popitem(last=False) # remove oldest if full to make room for the new one + self.peers[compact] = peer # save peer in cache (or update its status) + + def pop(self) -> Peer: + compact, peer = self.peers.popitem(last=True) + return peer + + def __len__(self) -> int: + return self.peers.__len__() + + def __iter__(self) -> Iterator[Peer]: + yield from self.peers.values() + + +class Bucket(dict): + size = config.k + + def __init__(self, min: int, max: int) -> None: + super().__init__() + self.min = min + self.max = max + + self.replacements: ReplacementCache = ReplacementCache(maxlen=self.size) + + @property + def full(self) -> bool: + return len(self) >= self.size + + @property + def fresh(self) -> bool: + return any(peer.status == PeerStatus.GOOD for peer in self.values()) + + @property + def depth(self): + """ + Return the depth of the bucket, i.e. the number of prefix bits shared by all contacts in this bucket. + """ + return int(log2(config.id_space // (self.max - self.min))) + + def fits(self, id: bytes) -> bool: + """ + Return whether or not the given node/peer id fits in this bucket. + """ + return self.min <= b2i(id) < self.max + + def add(self, peer: Peer) -> bool: + """ + Add the given peer to the bucket. + + :return: True if the peer was added to the bucket, otherwise False. + """ + # If the bucket is not full or the peer is already present, peer is added or updated, respectively + if in self or not self.full: + self[] = peer + return True + # Otherwise, if any peers in the bucket are known to have become bad, then one is replaced by the new peer + self.replacements.add(peer) + return self.fill() + + def fill(self) -> bool: + """ + Fill the bucket using peers from the replacement cache if it isn't full. Also replaces bad peers if any. + + :return: True if any new peer was added to the bucket, otherwise False. + """ + added = False + # Fill bucket using replacements + while not self.full and self.replacements: + self.add(self.replacements.pop()) + added = True + + # Replace bad peers + bad = {contact for contact, peer in self.items() if peer.status == PeerStatus.BAD} + while bad and self.replacements: + del self[bad.pop()] + self.add(self.replacements.pop()) + added = True + + return added + + def split(self) -> Tuple[Bucket, Bucket]: + """ + Split the bucket in two, dividing the contents between them. + + :return: The two new buckets. + """ + logger.debug("Splitting bucket") + half = (self.min + self.max) // 2 + left = Bucket(self.min, half) + right = Bucket(half, self.max) + + # Divide peers + for peer in self.values(): + bucket = left if left.fits( else right + bucket.add(peer) + + # Divide replacement peers + for peer in self.replacements: + bucket = left if left.fits( else right + bucket.replacements.add(peer) # this doesn't change the replacement cache order + + # Fill buckets to capacity using replacement cache + left.fill() + right.fill() + + return left, right + + def __repr__(self) -> str: + return "Bucket(min=2^{min}, max=2^{max}, fresh={fresh}, full={full}, peers={peers}," \ + " replacements={replacements})".format(min=log2(self.min), + max=log2(self.max), + fresh=self.fresh, + full=self.full, + peers=list(self.values()), + replacements=self.replacements) + + +class RoutingTable: + def __init__(self, node: Node) -> None: + self.node = node + + self.buckets: List[Bucket] = [Bucket(0, config.id_space)] # initially, the table has a single bucket of the entire ID space + + @property + def peers(self) -> Set[Peer]: + return {peer + for bucket in self.buckets + for peer in bucket.values()} + + @property + def contacts(self) -> Dict[Contact, Peer]: + return {contact: peer + for bucket in self.buckets + for contact, peer in bucket.items()} + + @property + def replacements(self) -> Set[Peer]: + return {peer + for bucket in self.buckets + for peer in bucket.replacements} + + def find_bucket(self, id: bytes) -> Bucket: + """ + Find the appropriate bucket for the given id. + """ + return next(bucket for bucket in self.buckets if bucket.fits(id)) + + def add(self, peer: Peer) -> None: + """ + Add the given peer to the appropriate bucket in the routing table. + """ + if == + return + logger.debug("Adding %s to routing table", peer) + bucket = self.find_bucket( + + # Try to add the peer to the appropriate bucket + if bucket.add(peer): + return + # Otherwise, if the bucket's range includes our own ID, it is split into two and the insertion attempt repeated + if bucket.fits( or bucket.depth % config.b != 0: + self.buckets.remove(bucket) + self.buckets.extend(bucket.split()) + self.add(peer) + + def closest(self, id: bytes, k=config.k, status: PeerStatus = None) -> List[Peer]: + """ + Return a list with the k closest peers to the given id in our buckets, optionally with given status. + """ + peers = self.peers + if status is not None: + peers = {peer for peer in peers if peer.status == status} + return closest(k, id, peers) + + async def refresh_bucket(self, bucket: Bucket) -> None: + """ + Refresh the bucket by picking a random ID in the range of the bucket and performing a find_nodes search on it. + """ + logger.debug("Refreshing bucket %s", bucket) + random_id = i2b(random.randint(bucket.min, bucket.max), length=20) # 20*8 bytes = 160 bits + await self.node.find_nodes(random_id) + + async def refresh_table(self) -> None: + """ + Refresh all buckets that have not been changed in 15 minutes. + """ + if all(peer.status == PeerStatus.BAD for peer in self.peers): + logger.warning("All peers in routing table have gone bad; bootstrapping again") + await self.node.bootstrap() + await asyncio.gather(*[self.refresh_bucket(bucket) + for bucket in self.buckets + if not bucket.fresh], + return_exceptions=True) + + def update_peer_status(self, contact: Contact, status: PeerStatus) -> None: + """ + Update the peer status for a peer given its contact information. + """ + with suppress(KeyError): + peer = self.contacts[contact] + peer.status = status + logger.debug("Updated status for %s to %s", contact, + self.find_bucket( diff --git a/silverstream/bittorrent/dht/ b/silverstream/bittorrent/dht/ new file mode 100644 index 0000000..244a346 --- /dev/null +++ b/silverstream/bittorrent/dht/ @@ -0,0 +1,36 @@ +from __future__ import annotations + +import heapq +import logging +import typing +from typing import Iterable, List + +from ...util import b2i + +if typing.TYPE_CHECKING: # PyCharm + from .peer import PC + +logger = logging.getLogger(__name__) + + +def distance(b1: bytes, b2: bytes) -> int: + """ + Calculate Kademlia distance metric, which is used to compare two node IDs or a node ID and an infohash. + + :param b1: Node ID or infohash. + :param b2: Node ID or infohash. + :return: XOR of the two inputs, interpreted as an unsigned integer. + """ + return b2i(b1) ^ b2i(b2) + + +def closest(k: int, id: bytes, nodes: Iterable[PC]) -> List[PC]: + """ + Return a list with the k closest nodes to the given id from the dataset. + + :param k: The number of nodes to return. + :param id: The target id. + :param nodes: Iterable of Peers or Contacts. + :return: List of closest nodes. + """ + return heapq.nsmallest(k, nodes, key=lambda n: distance(id, diff --git a/silverstream/bittorrent/ b/silverstream/bittorrent/ new file mode 100644 index 0000000..8a898e5 --- /dev/null +++ b/silverstream/bittorrent/ @@ -0,0 +1,72 @@ +from __future__ import annotations + +import asyncio +import logging +import typing +from pathlib import Path + +import libtorrent + +from .bencode import bencode +from .client import TorrentClient +from .database import IndexStatus +from ..config import data_dir +from ..util import make_data_dirs + +if typing.TYPE_CHECKING: # PyCharm + from .database import TorrentDatabase + +logger = logging.getLogger(__name__) + + +class Indexer: + def __init__(self, torrent_database: TorrentDatabase, torrent_client: TorrentClient, workers=25, timeout=30, + save_torrents=False) -> None: + self.torrent_database = torrent_database + self.torrent_client = torrent_client + self.workers = workers + + if save_torrents: + make_data_dirs("torrents/") + + if workers: +"Starting %s workers", workers) + self.unindexed = asyncio.Queue(maxsize=self.workers) + asyncio.create_task(self.enqueuer()) + for _ in range(self.workers): + asyncio.create_task(self.worker(timeout, save_torrents)) + + async def enqueuer(self): + while True: + info_hashes = self.torrent_database.get_unindexed(limit=self.workers) + for info_hash in info_hashes: + await self.unindexed.put(info_hash) + if not info_hashes: + await asyncio.sleep(30) # avoid spamming the database for hashes if there are none + + async def worker(self, timeout, save_torrents): + while True: + info_hash = await self.unindexed.get() + logger.debug("Indexing %s", info_hash) + try: + torrent_info = await asyncio.wait_for(self.torrent_client.get_torrent_info(info_hash), timeout=timeout) + except asyncio.TimeoutError: + logger.debug("Timed out downloading torrent info for %s", info_hash) + self.torrent_database.set_index_status(info_hash, IndexStatus.IndexingFailed) + continue + + if save_torrents: + name = f"{info_hash.hex()}-{[:64]}.torrent" # Avoids OSError: Filename too long + torrent = libtorrent.create_torrent(torrent_info) + with data_dir.joinpath("torrents/", name).open("wb") as file: + file.write(bencode(torrent.generate())) + songs = [] + files = torrent_info.files() + for file_index in range(files.num_files()): + file_path = Path(files.file_path(file_index)) + if file_path.suffix in (".aac", ".flac", ".m4a", ".mp3", ".mpc", ".ogg", ".opus", ".wav", ".wma"): + songs.append((file_index, " / ".join( + + if songs: + self.torrent_database.add_songs(info_hash, songs) + self.torrent_database.set_index_status(info_hash, IndexStatus.Indexed) diff --git a/silverstream/bittorrent/ b/silverstream/bittorrent/ new file mode 100644 index 0000000..8475854 --- /dev/null +++ b/silverstream/bittorrent/ @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import NamedTuple + +from ..util import i2b, b2i + + +class TorrentPeer(NamedTuple): + address: str + port: int + + @property + def compact(self) -> bytes: + address = bytes(map(int, self.address.split("."))) + port = i2b(self.port, length=2) + return address + port + + @staticmethod + def from_compact(b: bytes) -> TorrentPeer: + address = ".".join(map(str, b[:4])) + port = b2i(b[-2:]) + return TorrentPeer(address, port) diff --git a/silverstream/ b/silverstream/ new file mode 100644 index 0000000..c2bff56 --- /dev/null +++ b/silverstream/ @@ -0,0 +1,271 @@ +import argparse +import asyncio +import cmd +import inspect +import logging +import logging.config +import logging.config +import shutil +from asyncio import AbstractEventLoop +from collections import Counter +from threading import Thread + +from . import __version__, __author__, config, util +from .bittorrent.client import TorrentClient +from .bittorrent.crawler import Crawler +from .bittorrent.database import TorrentDatabase +from .bittorrent.dht.peer import Contact +from .bittorrent.indexer import Indexer +from .player import Player +from .stats import StatisticsLogger +from .util import get_default_arg, humanize + +logger = logging.getLogger(__name__) + + +class Cli(cmd.Cmd): + prompt = "silverstream> " + ruler = None + + def __init__(self, loop: AbstractEventLoop, torrent_database: TorrentDatabase, torrent_client: TorrentClient, + indexer: Indexer, crawler: Crawler, player: Player) -> None: + super().__init__() + self.loop = loop + self.torrent_database = torrent_database + self.torrent_client = torrent_client + self.indexer = indexer + self.crawler = crawler + self.player = player + + def run(self): + # Start cmdloop with bash-like ctrl-c support + while True: + try: + self.cmdloop() + break + except KeyboardInterrupt: + print("^C") + except ValueError as e: + print(e) + self.intro = None + + def do_status(self, _): + crawler_peers = Counter(p.compact for p in self.crawler.peers) + crawler_replacements = Counter(p.compact for p in self.crawler.replacements) + database_hashes = self.torrent_database.num_hashes() + database_indexed = self.torrent_database.num_indexed() + database_nonmusic = self.torrent_database.num_non_music() + try: + database_indexed_music_fraction = database_indexed / database_hashes + except ZeroDivisionError: + database_indexed_music_fraction = 0 + client = self.torrent_client.status + + print(inspect.cleandoc( + f""" + Torrent Database: + Hashes: {database_hashes:,} + Peers: {self.torrent_database.num_peers():,} + Non-music: {database_nonmusic:,} + Indexed: {database_indexed:,} ({database_indexed_music_fraction:.2%}) + Songs: {self.torrent_database.num_songs():,} + + Crawler: + Nodes: {len(self.crawler.nodes)} + DHT Peers: {sum(crawler_peers.values()):,} ({len(crawler_peers.keys()):,} unique) + Replacements: {sum(crawler_replacements.values()):,} ({len(crawler_replacements.keys()):,} unique) + + BitTorrent Client: + Peers: {client.num_peers} + Download Rate: {humanize(client.download_rate, suffix='B/s')} + Upload Rate: {humanize(client.upload_rate, suffix='B/s')} + Total Download: {humanize(client.total_download)} + Total Upload: {humanize(client.total_upload)} + """ + )) + + def do_search(self, query): + songs = self.torrent_database.search_song(query) + print("Id | Name") + print("---|----------------------------------------------------------------------") + for i, song in enumerate(songs): + print(f"{i:<2} | {song['name']}") + choice = int(input("Song: ")) + song = songs[choice] + asyncio.run_coroutine_threadsafe(self.player.download_and_play(song["name"], song["info_hash"], + song["file_index"]), self.loop) + + def do_peers(self, info_hash): + for peer in self.torrent_database.get_peers(bytes.fromhex(info_hash)): + print(peer) + + def do_pause(self, _): + self.player.pause() + + def do_resume(self, _): + self.player.resume() + + def do_playing(self, _): + self.player.currently_playing() + + def do_save(self, _): + + + def do_exit(self, _): + return True + + do_EOF = do_exit # Ctrl+D + + def emptyline(self): + """ + Method called when an empty line is entered in response to the prompt. + If this method is not overridden, it repeats the last nonempty command entered. + """ + pass + + def postloop(self): + """ + Hook method executed once when the cmdloop() method is about to return. Do cleanup here. + """ + print("Exiting..") + self.player.terminate() + + +def parse_args(): + parser = argparse.ArgumentParser(prog="silverstream", + formatter_class=lambda **kw: argparse.ArgumentDefaultsHelpFormatter( + **kw, + max_help_position=35, + width=shutil.get_terminal_size().columns - 2)) + + def at_least(n, string): + value = int(string) + if value < n: + raise argparse.ArgumentTypeError(f"Must be at least {n}") + return value + + parser.add_argument("--interface", + type=str, + default="", + metavar="interface", + help="Network interface to bind to.") + parser.add_argument("--port", + type=int, + default=6881, + metavar="port", + help="Network port to listen listen on. Ports are bound consecutively from this port.") + parser.add_argument("--load", + action="store_true", + help="Load state from file (use 'save' from the cli to save).") + parser.add_argument("--stats", + action="store_true", + help="Save statistics to file.") + parser.add_argument("-v", "--verbose", + action="count", + default=0, + help="Increase verbosity level. Can be used multiple times.") + parser.add_argument("--clean", + action="store_true", + help="Remove data directory.") + + crawler = parser.add_argument_group("Crawler") + crawler.add_argument("--crawler-nodes", + type=lambda s: at_least(1, s), + default=get_default_arg(Crawler, "num_nodes"), + metavar="nodes", + help="Number of BitTorrent DHT nodes to start.") + crawler.add_argument("--crawler-delay", + type=int, + default=get_default_arg(Crawler, "delay"), + metavar="seconds", + help="Number of seconds to wait between starting each of the BitTorrent DHT nodes.") + crawler.add_argument("--crawler-await-bootstrap", + action="store_true", + help="Wait for nodes to fully bootstrap before starting the next one.") + + indexer = parser.add_argument_group("Indexer") + indexer.add_argument("--indexer-workers", + type=int, + default=get_default_arg(Indexer, "workers"), + metavar="workers", + help="Number of Indexer workers to start.") + indexer.add_argument("--indexer-save-torrents", + action="store_true", + help="Save indexed torrents to torrents/.") + + btdht = parser.add_argument_group("BitTorrent DHT") + btdht.add_argument("--btdht-seed", + type=str, + action="append", + metavar="host:port", + help="BitTorrent DHT seed nodes. Overrides default seeds. Can be specified multiple times.") + + return parser.parse_args() + + +def main(): + args = parse_args() + + print(f"silverstream v{__version__}") + print(f"(c) {__author__}") + + if args.clean: + print(f"WARNING: Removing {config.data_dir}") + shutil.rmtree(config.data_dir, ignore_errors=True) + + # Configure logging + util.make_data_dirs("logs/") + logging_level = ("WARNING", "INFO", "DEBUG")[min(args.verbose, 2)] + logging.config.dictConfig(config.logging(level=logging_level)) + print("Logging level is", logging_level) + + print("Initializing components..") + event_loop = asyncio.get_event_loop() + event_loop.set_debug(logging_level == "DEBUG") + torrent_database, torrent_client, indexer, crawler, player = event_loop.run_until_complete(initialize(args)) + + Thread(target=event_loop.run_forever, daemon=True).start() + + cli = Cli(event_loop, torrent_database, torrent_client, indexer, crawler, player) + + + +async def initialize(args): + # Process args + endpoints = util.endpoints(args.interface, args.port) + if args.btdht_seed is not None: + args.btdht_seed = [Contact(*seed.split(":")) for seed in args.btdht_seed] + + torrent_database = TorrentDatabase() + + if args.load: + print("Loading from file") + crawler = Crawler.load(endpoints=endpoints, + torrent_database=torrent_database) + else: + crawler = Crawler(endpoints=endpoints, + num_nodes=args.crawler_nodes, + delay=args.crawler_delay, + seeds=args.btdht_seed, + torrent_database=torrent_database, + await_bootstrap=args.crawler_await_bootstrap) + + await crawler.started.wait() + + torrent_client = TorrentClient(endpoint=next(endpoints), + torrent_database=torrent_database, + dht_nodes=crawler.nodes) + indexer = Indexer(torrent_database=torrent_database, + torrent_client=torrent_client, + workers=args.indexer_workers, + save_torrents=args.indexer_save_torrents) + player = Player(torrent_client=torrent_client) + + if args.stats: + StatisticsLogger(torrent_database, indexer, crawler) + + return torrent_database, torrent_client, indexer, crawler, player + + +if __name__ == '__main__': + main() diff --git a/silverstream/ b/silverstream/ new file mode 100644 index 0000000..8990248 --- /dev/null +++ b/silverstream/ @@ -0,0 +1,56 @@ +import os +import sys +from pathlib import Path + + +def get_data_dir() -> Path: + if sys.platform == "win32": + return Path("~/AppData/Local/silverstream").expanduser() + if sys.platform == "darwin": + return Path("~/Library/Caches/silverstream").expanduser() + return Path(os.getenv("XDG_CACHE_HOME", "~/.cache/"), "silverstream").expanduser() + + +data_dir = get_data_dir() + + +def logging(level="WARNING"): + return { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": { + "format": "%(asctime)s [%(levelname)-7s] %(name)s:%(funcName)s - %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + "formatter": "standard", + "level": level, + }, + "main": { + "class": "logging.handlers.RotatingFileHandler", + "maxBytes": 5_242_880, # 5 MiB + "backupCount": 10, + "filename": data_dir.joinpath("logs/main.log"), + "encoding": "utf-8", + "formatter": "standard", + "level": "DEBUG", + }, + "warnings": { + "class": "logging.FileHandler", + "filename": data_dir.joinpath("logs/warnings.log"), + "encoding": "utf-8", + "formatter": "standard", + "level": "WARNING", + } + }, + "loggers": { + "silverstream": { + "level": "DEBUG", + "handlers": ["console", "main", "warnings"] + } + }, + } diff --git a/silverstream/ b/silverstream/ new file mode 100644 index 0000000..3c82b69 --- /dev/null +++ b/silverstream/ @@ -0,0 +1,89 @@ +from __future__ import annotations + +import asyncio +import logging +from pathlib import Path + +import mpv + +from .bittorrent.client import TorrentClient, torrent_eta +from .config import data_dir + +logger = logging.getLogger(__name__) + + +class Player: + def __init__(self, torrent_client: TorrentClient) -> None: + self.torrent_client = torrent_client + self.mpv = mpv.MPV() + + async def download_and_play(self, song_name: str, info_hash: bytes, file_index: int) -> None: + print("Downloading", song_name) + asyncio.create_task(, files=[file_index])) + handler = await self.torrent_client.get_torrent_handle(info_hash) + bit_rates = {".mp3": 128_000, ".flac": 512_000, ".ogg": 128_000, ".opus": 128_000} + stream_rate = bit_rates.get(Path(song_name).suffix) +"File's stream bit rate is: %s", stream_rate) + + # Wait until download is complete or we're downloading sufficiently fast for streaming the given audio format + while not handler.is_finished() and not (stream_rate and handler.status().download_payload_rate > stream_rate) \ + and torrent_eta(handler.status()) > 150: + await asyncio.sleep(1) + logger.debug("Playback ready") + path = data_dir.joinpath("downloads/", handler.get_torrent_info().files().file_path(file_index)) + # Wait until libtorrent has flushed at least 1 MiB from .parts to final file + while not path.exists() or path.stat().st_size < 1_048_576: # 1 MiB + logger.debug("Waiting for file") + await asyncio.sleep(1) +, path) + + def play(self, song_name: str, path: Path = None): + print("Playing", song_name) + if path is None: + self.mpv.playlist_pos = 0 + return + # overrides playlist completely, otherwise use add_to_playlist and skip to the song + + def pause(self): + self.mpv.pause = True + + def resume(self): + self.mpv.pause = False + + def skip(self): + # force player to end of song, essentially skipping + + def add_to_playlist(self, path: Path): + self.mpv.playlist_append(str(path)) + + def skip_to_song_or_idx(self, path: Path, idx=None): + if idx: + self.mpv.playlist_pos = idx + return + + for idx, song in enumerate(self.mpv.playlist): + if song["filename"] == str(path): + self.mpv.playlist_pos = idx + + def remove_from_playlist(self, path: Path): + for idx, song in enumerate(self.mpv.playlist): + if song["filename"] == str(path): + self.mpv.playlist_remove(idx) + + def clear_playlist(self): + self.mpv.playlist_clear() + + def display_playlist(self): + print(self.mpv.playlist) + + def next_song(self): + self.mpv.playlist_next() + + def previous_song(self): + self.mpv.playlist_prev() + + def currently_playing(self): + print(self.mpv.playlist[self.mpv.playlist_pos]["filename"]) + + def terminate(self): + self.mpv.terminate() diff --git a/silverstream/ b/silverstream/ new file mode 100644 index 0000000..0cf0cd1 --- /dev/null +++ b/silverstream/ @@ -0,0 +1,61 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from collections import Counter +from datetime import datetime + +from . import util +from .bittorrent.crawler import Crawler +from .bittorrent.database import TorrentDatabase, IndexStatus +from .bittorrent.indexer import Indexer +from .config import data_dir + +logger = logging.getLogger(__name__) + + +class StatisticsLogger: + def __init__(self, torrent_database: TorrentDatabase, indexer: Indexer, crawler: Crawler): + self.torrent_database = torrent_database + self.indexer = indexer + self.crawler = crawler + util.make_data_dirs("statistics/") +"Statistics enabled") + asyncio.create_task(self._stat_saver()) + + async def _stat_saver(self): + while True: + with data_dir.joinpath("statistics/stats.json").open("a") as file: + file.write(json.dumps(self._generate_stat())) + file.write("\n") + await asyncio.sleep(5*60) + + def _generate_stat(self): + crawler_peers = Counter(p.compact for p in self.crawler.peers) + crawler_replacements = Counter(p.compact for p in self.crawler.replacements) + return { + "utc": int(datetime.utcnow().timestamp()), + "data": { + "torrent_database": { + "num_hashes": self.torrent_database.num_hashes(), + "num_peers": self.torrent_database.num_peers(), + "num_notindexed": self.torrent_database.num_indexed(status=IndexStatus.NotIndexed), + "num_indexed": self.torrent_database.num_indexed(status=IndexStatus.Indexed), + "num_indexing": self.torrent_database.num_indexed(status=IndexStatus.Indexing), + "num_indexingfailed": self.torrent_database.num_indexed(status=IndexStatus.IndexingFailed), + "num_non_music": self.torrent_database.num_non_music(), + "num_songs": self.torrent_database.num_songs(), + }, + "crawler": { + "num_nodes": len(self.crawler.nodes), + "num_peers": sum(crawler_peers.values()), + "num_peers_unique": len(crawler_peers.keys()), + "num_replacements": sum(crawler_replacements.values()), + "num_replacements_unique": len(crawler_replacements.keys()), + }, + "indexer": { + "workers": self.indexer.workers, + }, + } + } diff --git a/silverstream/ b/silverstream/ new file mode 100644 index 0000000..5080039 --- /dev/null +++ b/silverstream/ @@ -0,0 +1,187 @@ +import asyncio +import collections +import hashlib +import inspect +import math +from itertools import zip_longest +from typing import (Dict, Any, Coroutine, TypeVar, List, Tuple, AsyncIterable, Union, Iterable, Iterator, AsyncIterator, + NamedTuple) + +from . import config + +T = TypeVar("T") + + +def make_data_dirs(*paths): + for path in paths: + config.data_dir.joinpath(path).mkdir(parents=True, exist_ok=True) + + +class Endpoint(NamedTuple): + interface: str + port: int + + def __str__(self) -> str: + return f"{self.interface}:{self.port}" + + +def endpoints(interface: str, start_port: int) -> Iterator[Endpoint]: + while True: + yield Endpoint(interface, start_port) + start_port += 1 + + +def b2i(b: bytes) -> int: + """ + Convert a sequence of bytes to an integer. + + :param b: The array of bytes to convert. + :return: The integer represented by the given array of bytes in unsigned network byteorder. + """ + return int.from_bytes(b, byteorder="big", signed=False) + + +def i2b(i: int, length: int = None) -> bytes: + """ + Convert an integer to a sequence of bytes of the given length. + + :param i: The integer to convert. + :param length: The length of the output byte-sequence. + :return: Sequence of bytes representing the integer in unsigned network byteorder. + """ + length = length or (i.bit_length() + 7) // 8 + return int.to_bytes(i, length, byteorder="big", signed=False) + + +def sha1(data) -> bytes: + """ + Takes an arbitrary block of byte-data and calculates a fixed-size bit string (a digest). + + :param data: Bytes to calculate hash for. + :return: Digest of data. + """ + return hashlib.sha1(data).digest() + + +def grouper(iterable, n, fillvalue=None): + """ + Collect data into fixed-length chunks or blocks. + Example: grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx. + From: + + :param iterable: Iterable to split. + :param n: Length of chunks, + :param fillvalue: Value to use in case last chunk cannot be filled. + :return: Iterable split into chunks of size n, with last chunk padded with fillvalue if necessary. + """ + args = [iter(iterable)] * n + return zip_longest(*args, fillvalue=fillvalue) + + +def get_default_arg(func: callable, arg: str): + """ + Return function's default argument value for the given arg. + Based on + """ + default = inspect.signature(func).parameters[arg].default + if default is inspect.Parameter.empty: + return None + return default + + +def log2(x: int): + """ + A log2 function that makes mathematicians go mad. + """ + try: + return math.log2(x) + except ValueError: + return 0 + + +def split_interval(max, num_intervals) -> List[Tuple[int, int]]: + """ + Split the interval [0, max] in num_interval sub-intervals using integer division. + + :param max: The original interval's max value. + :param num_intervals: The number of sub-intervals to produce. + :return: List of tuples containing the sub-intervals. + """ + return [(i*(max//num_intervals), (i+1)*(max//num_intervals)) for i in range(num_intervals)] + + +def humanize(n, precision=2, prefix="bin", suffix="B") -> str: + """ + Return a humanized string representation of a number (of bytes). + Adapted from Doug Latornell - + """ + abbrevs = { + "dec": [ + (1000 ** 5, 'P' + suffix), + (1000 ** 4, 'T' + suffix), + (1000 ** 3, 'G' + suffix), + (1000 ** 2, 'M' + suffix), + (1000 ** 1, 'k' + suffix), + (1000, suffix) + ], + "bin": [ + (1 << 50, 'Pi' + suffix), + (1 << 40, 'Ti' + suffix), + (1 << 30, 'Gi' + suffix), + (1 << 20, 'Mi' + suffix), + (1 << 10, 'ki' + suffix), + (1, suffix) + ] + } + + if n == 1: + return "1 " + suffix + + for factor, suffix in abbrevs[prefix]: + if n >= factor: + break + # noinspection PyUnboundLocalVariable + return '%.*f %s' % (precision, n / factor, suffix) + + +async def async_take(iterable: AsyncIterable, n: int) -> AsyncIterator: + """ + Forward the first n elements from an asynchronous iterable. + Designed to work like take() from + + :param iterable: Async iterable. + :param n: The number of elements to forward. + :return: An async iterable containing the n first elements from given iterable. + """ + i = 0 + async for element in iterable: + if i >= n: + break + yield element + i += 1 + + +async def async_tail(n, iterable: AsyncIterable) -> Iterator: + """ + Return an async iterator over the last n items. + Example: async_tail(3, 'ABCDEFG') --> E F G. + Designed to work like tail() from + + :param n: Number of items to return. + :param iterable: Async iterable to return from. + :return: Async iterable containing the last n items from the original iterable. + """ + deque = collections.deque(maxlen=n) + async for element in iterable: + deque.append(element) + return iter(deque) + + +async def async_last(iterable: AsyncIterable[T]) -> T: + """ + Return the last element of an async iterable. + + :param iterable: Async iterable + :return: The last element of the iterable. + """ + return next(await async_tail(1, iterable)) diff --git a/tests/ b/tests/ new file mode 100644 index 0000000..e69de29 diff --git a/tests/bittorrent/ b/tests/bittorrent/ new file mode 100644 index 0000000..e69de29 diff --git a/tests/bittorrent/ b/tests/bittorrent/ new file mode 100644 index 0000000..1bac11c --- /dev/null +++ b/tests/bittorrent/ @@ -0,0 +1,48 @@ +import unittest +from datetime import timedelta, datetime +from unittest.mock import patch + +from silverstream.bittorrent.dht.peer import Peer, PeerStatus, Contact + + +class TestContact(unittest.TestCase): + def test_compact(self): + contact = Contact("", 8080) + self.assertEqual(contact, Contact.from_compact(contact.compact)) + + +class TestPeer(unittest.TestCase): + def setUp(self): + self.peer = Peer("", 8080, b'\x05%Rz\x00\x91\x91*\xd3\x10\x05C\xb8F\xa7\xe8\xab\xfe\xc7W') + + def test_compact(self): + self.assertEqual(self.peer.compact, Peer.from_compact(self.peer.compact).compact) + + def test_questionable_by_default(self): + self.assertEqual(self.peer.status, PeerStatus.QUESTIONABLE) + + def test_set_questionable(self): + self.peer.status = PeerStatus.BAD + self.peer.status = PeerStatus.QUESTIONABLE + self.assertEqual(self.peer.status, PeerStatus.QUESTIONABLE) + + def test_bad_overrides(self): + peer = Peer("", 8080, b'\x05%Rz\x00\x91\x91*\xd3\x10\x05C\xb8F\xa7\xe8\xab\xfe\xc7W', + status=PeerStatus.BAD) + self.assertEqual(peer.status, PeerStatus.BAD) + + def test_good_for_15min(self): + with patch("silverstream.bittorrent.dht.peer.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(2010, 1, 1, 12, 0, 0) + mock_datetime.side_effect = lambda *args, **kw: datetime(*args, **kw) + + self.peer.status = PeerStatus.GOOD + self.assertEqual(self.peer.status, PeerStatus.GOOD) + + mock_datetime.utcnow.return_value += timedelta(minutes=15) + + self.assertEqual(self.peer.status, PeerStatus.QUESTIONABLE) + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/ b/tools/ new file mode 100644 index 0000000..1eda772 --- /dev/null +++ b/tools/ @@ -0,0 +1,53 @@ +import os +import sys +from pathlib import Path +import libtorrent + +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) # magic import-fixer +from silverstream.bittorrent.database import TorrentDatabase, IndexStatus + +try: + torrents_dir, database_filepath = map(Path, sys.argv[1:]) +except (IndexError, ValueError): + exit(f"Usage: {__file__} ") + +torrent_database = TorrentDatabase(filepath=database_filepath) + +num_torrents = num_skipped = num_songs = 0 +for torrent in torrents_dir.iterdir(): + num_torrents += 1 + print() + print("---") + print("Adding", torrent) + torrent_info = libtorrent.torrent_info(str(torrent)) + info_hash = torrent_info.info_hash().to_bytes() + torrent_name = + print("Info hash:", info_hash.hex()) + print("Torrent name:", torrent_name) + + torrent_database.add_peer(info_hash, name=torrent_name) + + if torrent_database.get_index_status(info_hash) == IndexStatus.Indexed: + print("Torrent already indexed: skipping") + num_skipped += 1 + continue + + songs = [] + files = torrent_info.files() + for file_index in range(files.num_files()): + file_path = Path(files.file_path(file_index)) + if file_path.suffix in (".aac", ".flac", ".m4a", ".mp3", ".mpc", ".ogg", ".opus", ".wav", ".wma"): + songs.append((file_index, " / ".join( + num_songs += 1 + + if songs: + torrent_database.add_songs(info_hash, songs) + print("Songs:") + for file_index, song_name in songs: + print(song_name) + torrent_database.set_index_status(info_hash, IndexStatus.Indexed) + +print("-----") +print("Torrents:", num_torrents) +print("Skipped:", num_skipped) +print("Songs:", num_songs) diff --git a/tools/ b/tools/ new file mode 100644 index 0000000..ab719e9 --- /dev/null +++ b/tools/ @@ -0,0 +1,43 @@ +import dbm.gnu as dbm +import os +import sys +from pathlib import Path +from typing import List, Tuple, Iterator + +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) # magic import-fixer +from silverstream.bittorrent.database import TorrentDatabase +from silverstream.bittorrent.peer import TorrentPeer +from silverstream.util import grouper + + +class OldPeerDatabaseStub: + def __init__(self, filepath) -> None: + self.dbm =, "rf") # read/fast mode + + def __iter__(self) -> Iterator[Tuple[bytes, List[TorrentPeer]]]: + info_hash = self.dbm.firstkey() + while info_hash is not None: + yield info_hash, self.get(info_hash) + info_hash = self.dbm.nextkey(info_hash) + + def get(self, info_hash: bytes) -> List[TorrentPeer]: + compact_peers = grouper(self.dbm.get(info_hash), 6) + return [TorrentPeer.from_compact(bytes(compact_peer)) for compact_peer in compact_peers] + + +try: + old_filepath = Path(sys.argv[1]) +except IndexError: + exit(f"Usage: {__file__} ") + +old = OldPeerDatabaseStub(str(old_filepath)) +new = TorrentDatabase(filepath=old_filepath.with_suffix(".sqlite")) + +num_keys = len(old.dbm.keys()) +num_hashes = num_peers = 0 +for info_hash, peers in old: + num_hashes += 1 + for peer in peers: + num_peers += 1 + new.add_peer(info_hash, peer) + print(f"{num_hashes} hashes, {num_peers} peers done ({num_hashes / num_keys:.2%})") diff --git a/tools/ b/tools/ new file mode 100644 index 0000000..88f892b --- /dev/null +++ b/tools/ @@ -0,0 +1,47 @@ +import json +import sys +from datetime import datetime +from pathlib import Path + +import matplotlib.pyplot as plt + +paths = [Path(f) for f in sys.argv[1:]] +if not paths: + exit(f"Usage: {__file__} ..") + + +def parse_stats(path: Path): + timestamps = [] + data = [] + with as file: + for line in file.readlines(): + stat = json.loads(line) + timestamps.append(datetime.utcfromtimestamp(stat["utc"])) + data.append(stat["data"]) + return max(d["crawler"]["num_nodes"] for d in data), timestamps, data + + +fig, (ax1, ax2) = plt.subplots(1, 2) + +ax1.set_ylabel("Torrent Hashes") + +ax2.set_ylabel("Peers") +ax2.yaxis.tick_right() +ax2.yaxis.set_label_position("right") + +for ax in (ax1, ax2): + ax.set_xlabel("Hours") + ax.tick_params("y") + ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x)))) + ax.grid(linewidth=.2) + + +for num_nodes, timestamps, data in sorted((parse_stats(p) for p in paths), reverse=True): + timestamps = [(t - timestamps[0]).total_seconds()/3600 for t in timestamps] # convert to hours relative to start + ax1.plot(timestamps, [d["torrent_database"]["num_hashes"] for d in data], label=f"{num_nodes} nodes") + ax2.plot(timestamps, [d["crawler"]["num_peers"] for d in data]) + + +ax1.legend(loc="upper left") +#fig.subplots_adjust(wspace=0)