commit 49eb78d5c060f7a50677f9316f2c9fb14d1ab7ec Author: Casper V. Kristensen Date: Fri Apr 19 02:33:31 2019 +0200 Publish. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..50c49f2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,358 @@ + +# Created by https://www.gitignore.io/api/python,pycharm,pycharm+all,pycharm+iml,emacs +# Edit at https://www.gitignore.io/?templates=python,pycharm,pycharm+all,pycharm+iml,emacs + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +.idea/sonarlint + +### PyCharm+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff + +# Generated files + +# Sensitive or high-churn files + +# Gradle + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules + +# CMake + +# Mongo Explorer plugin + +# File-based project format + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# Crashlytics plugin (for Android Studio and IntelliJ) + +# Editor-based Rest Client + +# Android studio 3.1+ serialized cache file + +### PyCharm+all Patch ### +# Ignores the whole .idea folder and all .iml files +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 + +*.iml +modules.xml +.idea/misc.xml +*.ipr + +### PyCharm+iml ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff + +# Generated files + +# Sensitive or high-churn files + +# Gradle + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules + +# CMake + +# Mongo Explorer plugin + +# File-based project format + +# IntelliJ + +# mpeltonen/sbt-idea plugin + +# JIRA plugin + +# Cursive Clojure plugin + +# Crashlytics plugin (for Android Studio and IntelliJ) + +# Editor-based Rest Client + +# Android studio 3.1+ serialized cache file + +### PyCharm+iml Patch ### +# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 + + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +### Python Patch ### +.venv/ + +### Python.VirtualEnv Stack ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + +# End of https://www.gitignore.io/api/python,pycharm,pycharm+all,pycharm+iml,emacs diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md new file mode 100644 index 0000000..df0fada --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# Silverstream: *A fully decentralised music streaming platform* + +## Installation +Silverstream requires Python 3.7+ with the `libtorrent` library, as well as the `libmpv1` package from your local +package repository. The dependencies can be installed from the Debian repositories: +```bash +sudo apt install -y python3.7 python3-libtorrent libmpv1 +``` +**Or** from source: +```bash +# Python 3.7 (https://docs.python.org/3/using/unix.html#building-python) +sudo apt build-dep -y python3 +wget https://www.python.org/ftp/python/3.7.1/Python-3.7.1.tgz +tar xf Python-3.7.1.tgz +cd Python-3.7.1 +./configure --enable-optimizations +make -j$(nproc) +sudo make altinstall + +# Libtorrent (https://dev.deluge-torrent.org/wiki/Building/libtorrent) +sudo apt build-dep -y libtorrent-rasterbar +wget https://github.com/arvidn/libtorrent/releases/download/libtorrent_1_1_11/libtorrent-rasterbar-1.1.11.tar.gz +tar xf libtorrent-rasterbar-1.1.11.tar.gz +cd libtorrent-rasterbar-1.1.11/ +PYTHON=$(which python3.7) ./configure --enable-python-binding --with-libiconv --disable-debug +make -j$(nproc) +sudo make install +sudo ldconfig +``` + +The libtorrent version from the Debian repository doesnt't seem to play nicely with Python 3.7, so please **check the +installation**: +```bash +python3.7 -c "import libtorrent; print(libtorrent.version)" +``` + +After installing the dependencies, the application can be installed using pip: +```bash +python3.7 -m pip install --upgrade https://git.caspervk.net/caspervk/silverstream/archive/master.tar.gz +``` + +## Usage +The program can be started by running `silverstream` or `python3.7 -m silverstream`, depending on system configuration. +```text +usage: silverstream [-h] [--interface interface] [--port port] [--load] [--stats] [-v] [--clean] [--crawler-nodes nodes] + [--crawler-delay seconds] [--crawler-await-bootstrap] [--indexer-workers workers] + [--indexer-save-torrents] [--btdht-seed host:port] + +optional arguments: + -h, --help show this help message and exit + --interface interface Network interface to bind to. (default: 0.0.0.0) + --port port Network port to listen listen on. Ports are bound consecutively from this port. + (default: 6881) + --load Load state from file (use 'save' from the cli to save). + --stats Save statistics to file. + -v, --verbose Increase verbosity level. Can be used multiple times. + --clean Remove data directory. + +Crawler: + --crawler-nodes nodes Number of BitTorrent DHT nodes to start. (default: 32) + --crawler-delay seconds Number of seconds to wait between starting each of the BitTorrent DHT nodes. (default: 1) + --crawler-await-bootstrap Wait for nodes to fully bootstrap before starting the next one. + +Indexer: + --indexer-workers workers Number of Indexer workers to start. (default: 25) + --indexer-save-torrents Save indexed torrents to torrents/. + +BitTorrent DHT: + --btdht-seed host:port BitTorrent DHT seed nodes. Overrides default seeds. Can be specified multiple times. +``` + +You may need to open some ports in your firewall. This command will allow a crawler with 128 nodes on UFW: +```bash +ufw allow 6881:7009/udp comment "silverstream" +``` diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..553cddc --- /dev/null +++ b/setup.py @@ -0,0 +1,50 @@ +# Always prefer setuptools over distutils +from setuptools import setup, find_packages +# To use a consistent encoding +from codecs import open +from os import path + +from silverstream import __author__, __version__, __licence__ + + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the README file +with open(path.join(here, "README.md"), encoding="utf-8") as f: + long_description = f.read() + + +setup( + name="silverstream", + version=__version__, + description="A fully decentralised music streaming platform", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://silverstream.org", + project_urls={ + "Source": "https://example.com/me/silverstream.git" + }, + author=__author__, + classifiers=[ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Programming Language :: Python :: 3", + ], + python_requires=">=3.7", + keywords="silverstream music streaming bittorrent kademlia p2p", + license=__licence__, + packages=find_packages(exclude=["tests"]), + install_requires=[ + "python-mpv", + ], + extras_require={ + "dev": [ + "matplotlib" + ] + }, + entry_points={ + "console_scripts": [ + "silverstream = silverstream.cli:main" + ] + }, +) diff --git a/silverstream.pdf b/silverstream.pdf new file mode 100644 index 0000000..2182f9b Binary files /dev/null and b/silverstream.pdf differ diff --git a/silverstream/__init__.py b/silverstream/__init__.py new file mode 100644 index 0000000..a553a69 --- /dev/null +++ b/silverstream/__init__.py @@ -0,0 +1,6 @@ +__version__ = "0.0.1" +__author__ = "Alexander Munch-Hansen & Casper V. Kristensen" +__licence__ = "GPLv3" + +import sys +assert sys.version_info >= (3, 7), "Silverstream requires python 3.7 or later." diff --git a/silverstream/__main__.py b/silverstream/__main__.py new file mode 100644 index 0000000..a59b396 --- /dev/null +++ b/silverstream/__main__.py @@ -0,0 +1,4 @@ + +if __name__ == "__main__": + from .cli import main + main() diff --git a/silverstream/bittorrent/__init__.py b/silverstream/bittorrent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/silverstream/bittorrent/bencode.py b/silverstream/bittorrent/bencode.py new file mode 100644 index 0000000..204d1c5 --- /dev/null +++ b/silverstream/bittorrent/bencode.py @@ -0,0 +1,137 @@ +# +# This file is modified from https://git.deluge-torrent.org/deluge/tree/deluge/bencode.py. +# Used to encode/decode BitTorrent Mainline DHT network communications. +# + + +DICT_DELIM = b'd' +END_DELIM = b'e' +INT_DELIM = b'i' +LIST_DELIM = b'l' +BYTE_SEP = b':' + + +def decode_int(x, f): + f += 1 + newf = x.index(END_DELIM, f) + n = int(x[f:newf]) + if x[f:f + 1] == b'-' and x[f + 1:f + 2] == b'0': + raise ValueError + elif x[f:f + 1] == b'0' and newf != f + 1: + raise ValueError + return (n, newf + 1) + + +def decode_string(x, f): + colon = x.index(BYTE_SEP, f) + n = int(x[f:colon]) + if x[f:f + 1] == b'0' and colon != f + 1: + raise ValueError + colon += 1 + return (x[colon:colon + n], colon + n) + + +def decode_list(x, f): + r, f = [], f + 1 + while x[f:f + 1] != END_DELIM: + v, f = decode_func[x[f:f + 1]](x, f) + r.append(v) + return (r, f + 1) + + +def decode_dict(x, f): + r, f = {}, f + 1 + while x[f:f + 1] != END_DELIM: + k, f = decode_string(x, f) + r[k.decode("utf8")], f = decode_func[x[f:f + 1]](x, f) + return (r, f + 1) + + +decode_func = { + LIST_DELIM: decode_list, + DICT_DELIM: decode_dict, + INT_DELIM: decode_int, + b'0': decode_string, + b'1': decode_string, + b'2': decode_string, + b'3': decode_string, + b'4': decode_string, + b'5': decode_string, + b'6': decode_string, + b'7': decode_string, + b'8': decode_string, + b'9': decode_string +} + + +def bdecode(x): + try: + r, __ = decode_func[x[0:1]](x, 0) + except (IndexError, KeyError, ValueError): + raise ValueError + else: + return r + + +class Bencached(object): + __slots__ = ['bencoded'] + + def __init__(self, s): + self.bencoded = s + + +def encode_bencached(x, r): + r.append(x.bencoded) + + +def encode_int(x, r): + r.extend((INT_DELIM, str(x).encode('utf8'), END_DELIM)) + + +def encode_bool(x, r): + encode_int(1 if x else 0, r) + + +def encode_string(x, r): + encode_bytes(x.encode('utf8'), r) + + +def encode_bytes(x, r): + r.extend((str(len(x)).encode('utf8'), BYTE_SEP, x)) + + +def encode_list(x, r): + r.append(LIST_DELIM) + for i in x: + encode_func[type(i)](i, r) + r.append(END_DELIM) + + +def encode_dict(x, r): + r.append(DICT_DELIM) + for k, v in sorted(x.items()): + try: + k = k.encode('utf8') + except AttributeError: + pass + r.extend((str(len(k)).encode('utf8'), BYTE_SEP, k)) + encode_func[type(v)](v, r) + r.append(END_DELIM) + + +encode_func = { + Bencached: encode_bencached, + int: encode_int, + list: encode_list, + tuple: encode_list, + dict: encode_dict, + bool: encode_bool, + str: encode_string, + bytes: encode_bytes +} + + +def bencode(x): + r = [] + encode_func[type(x)](x, r) + return b''.join(r) diff --git a/silverstream/bittorrent/client.py b/silverstream/bittorrent/client.py new file mode 100644 index 0000000..bee9c06 --- /dev/null +++ b/silverstream/bittorrent/client.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import asyncio +import logging +from collections import Sequence +from contextlib import suppress +from math import inf +from typing import Iterable, Optional + +import libtorrent + +from .database import TorrentDatabase +from .dht.node import Node +from .dht.util import distance +from ..config import data_dir +from ..util import async_take, Endpoint, humanize + +logger = logging.getLogger(__name__) + + +class TorrentClient: + def __init__(self, endpoint: Endpoint, torrent_database: TorrentDatabase, dht_nodes: Sequence[Node]) -> None: + logger.info("Starting torrent client on %s", endpoint) + self.torrent_database = torrent_database + self.dht_nodes = dht_nodes + self.session = libtorrent.session({ + "listen_interfaces": str(endpoint), + "enable_dht": False, # we'll use our own + }) + # TODO: The recommended practice is to first pause the session, then generate the fast resume data, and then close it down. + + @property + def status(self) -> libtorrent.session_status: + return self.session.status() + + async def get_torrent_handle(self, info_hash: bytes) -> Optional[libtorrent.torrent_handle]: + while True: + handle = self.session.find_torrent(libtorrent.sha1_hash(info_hash)) + if handle.is_valid(): + return handle + await asyncio.sleep(1) + + async def download(self, info_hash: bytes, files: Iterable[int] = None) -> libtorrent.torrent_info: + """ + Download data of the given torrent file. + + :param info_hash: The info hash of the torrent. + :param files: File-indexes to download. Downloads all by default. + :return Torrent info of downloaded torrent. + """ + logger.info("Downloading %s (files: %s)", info_hash, files) + params = { + "info_hash": info_hash, + "save_path": str(data_dir.joinpath("downloads/")), + "auto_managed": False, + "paused": True + } + with suppress(StopIteration): + torrent_file = next(data_dir.joinpath("torrents/").glob(f"{info_hash.hex()}*.torrent")) + logger.info("Using file %s", torrent_file) + params.update({"ti": libtorrent.torrent_info(str(torrent_file))}) + handle: libtorrent.torrent_handle = self.session.add_torrent(params) + handle.set_sequential_download(True) # apparently doesn't work setting it in the constructor + if files is not None: + handle.prioritize_files([int(f in files) for f in range(100_000)]) + handle.resume() + # TODO: Add peers from torrent database? They might've been indexed days ago, so maybe not too relevant + for peer in self.torrent_database.get_peers(info_hash): + handle.connect_peer(peer) + + get_peers = min(self.dht_nodes, key=lambda n: distance(n.id, info_hash)).get_peers(info_hash) + while True: + s = handle.status() + logger.debug(f"Downloading {s.name or info_hash.hex()}: {s.progress:.2%} complete (eta: " + f"{torrent_eta(s):.0f}s, down: {humanize(s.download_rate, suffix='B/s')} up: " + f"{humanize(s.upload_rate, suffix='B/s')} peers: {s.num_peers}, candidates: " + f"{s.connect_candidates}) {s.state}") + if handle.is_finished(): + break + if s.download_payload_rate < 128_000: # 1024 kbit/s + logger.debug("Slow download rate; asking dht for 8 more peers") + async for peer in async_take(get_peers, 8): + handle.connect_peer(peer) + await asyncio.sleep(1) + + # TODO: Remove torrents where we only downloaded the metadata to keep the number of open connections low? + #if not files: + # self.session.remove_torrent(handle) + logger.info("Downloading %s (files: %s) completed", info_hash, files) + return handle.get_torrent_info() + + async def get_torrent_info(self, info_hash: bytes) -> libtorrent.torrent_info: + logger.info("Getting torrent info for %s", info_hash) + return await self.download(info_hash, files=[]) + + +def torrent_eta(torrent_status: libtorrent.torrent_status) -> float: + try: + return (torrent_status.total_wanted - torrent_status.total_wanted_done) / torrent_status.download_payload_rate + except ZeroDivisionError: + return inf diff --git a/silverstream/bittorrent/crawler.py b/silverstream/bittorrent/crawler.py new file mode 100644 index 0000000..98e70e8 --- /dev/null +++ b/silverstream/bittorrent/crawler.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import asyncio +import json +import logging +import random +import typing +from asyncio import Event +from typing import List, Iterator + +from .database import TorrentDatabase +from .dht import config +from .dht.node import Node +from ..config import data_dir +from ..util import split_interval, i2b, Endpoint + +if typing.TYPE_CHECKING: # PyCharm + from .dht.node import Peer, Contact + +logger = logging.getLogger(__name__) + + +class Crawler: + def __init__(self, torrent_database: TorrentDatabase, endpoints: Iterator[Endpoint] = None, num_nodes=32, delay=1, + seeds: List[Contact] = None, nodes: List[Node] = None, await_bootstrap=False) -> None: + self.torrent_database = torrent_database + self.nodes: List[Node] = nodes or [] + + self.started = Event() + asyncio.create_task(self._start(endpoints, num_nodes, delay, seeds, await_bootstrap)) + + async def _start(self, endpoints, num_nodes, delay, seeds, await_bootstrap): + # If we were given nodes in the constructor, e.g. by Crawler.load(), just wait for the first to bootstrap + if self.nodes: + await asyncio.wait([node.started.wait() for node in self.nodes], return_when=asyncio.FIRST_COMPLETED) + self.started.set() + return + + logger.info("Starting %s nodes", num_nodes) + ids = [i2b(random.randint(a, b), length=20) for a, b in split_interval(config.id_space, num_nodes)] + #ids = [i2b(random.randint(0, config.id_space), length=20) for _ in range(num_nodes)] + for i, id in enumerate(ids): + node = Node(endpoint=next(endpoints), + id=id, + seeds=seeds, + torrent_database=self.torrent_database) + if await_bootstrap: + await node.started.wait() + self.nodes.append(node) + self.started.set() + logger.debug("Waiting %ss..", delay) + await asyncio.sleep(delay) + + @property + def peers(self) -> List[Peer]: + return [peer + for node in self.nodes + for peer in node.routing_table.peers] + + @property + def replacements(self) -> List[Peer]: + return [peer + for node in self.nodes + for bucket in node.routing_table.buckets + for peer in bucket.replacements] + + def save(self, filepath=data_dir.joinpath("crawler.json")) -> None: + with filepath.open("w") as file: + json.dump([node.json() for node in self.nodes], file) + + @staticmethod + def load(endpoints: Iterator[Endpoint], torrent_database: TorrentDatabase, + filepath=data_dir.joinpath("crawler.json")) -> Crawler: + with filepath.open() as file: + nodes = json.load(file) + + return Crawler(torrent_database=torrent_database, + nodes=[Node.from_json(node, next(endpoints), torrent_database) for node in nodes]) diff --git a/silverstream/bittorrent/database.py b/silverstream/bittorrent/database.py new file mode 100644 index 0000000..0dafa6b --- /dev/null +++ b/silverstream/bittorrent/database.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import logging +import re +import sqlite3 +import string +from enum import IntEnum +from typing import List, Optional, TypeVar, Iterable, Tuple + +from .peer import TorrentPeer +from ..config import data_dir +from ..util import grouper + +T = TypeVar("T") +logger = logging.getLogger(__name__) + + +class IndexStatus(IntEnum): + NotIndexed = 0 + Indexed = 1 + Indexing = 2 + IndexingFailed = 3 + + +class TorrentDatabase: + def __init__(self, filepath=data_dir.joinpath("torrents.sqlite")) -> None: + data_dir.mkdir(parents=True, exist_ok=True) + self.connection = sqlite3.connect(filepath, check_same_thread=False) + self.connection.row_factory = sqlite3.Row # allow accessing rows by index and case-insensitively by name + self.connection.text_factory = bytes # don't try to decode bytes as utf-8 strings + self.connection.executescript(f""" + CREATE TABLE IF NOT EXISTS + torrents (id INTEGER PRIMARY KEY, + info_hash BLOB UNIQUE NOT NULL, + name TEXT DEFAULT NULL, + peers BLOB DEFAULT x'', + indexed INTEGER DEFAULT {IndexStatus.NotIndexed.value}, + non_music INTEGER DEFAULT NULL); + + UPDATE torrents + SET indexed = {IndexStatus.NotIndexed.value} + WHERE indexed = {IndexStatus.Indexing.value}; -- reset status if indexing was interrupted + + CREATE VIRTUAL TABLE IF NOT EXISTS + songs USING FTS5(torrent_id UNINDEXED, + file_index UNINDEXED, + name, + tokenize='porter unicode61 remove_diacritics 1'); + VACUUM; + """) + + def add_peer(self, info_hash: bytes, peer: Optional[TorrentPeer] = None, name: str = None) -> None: + logger.debug("Adding peer %s to info hash %s", peer, info_hash) + self.connection.execute("INSERT OR IGNORE INTO torrents (info_hash) VALUES (?)", (info_hash,)) + + if name is not None and re.search("720p|1080p|\.mp4|\.mkv|\.rar|\.avi|\.zip|XviD|XXX|x26[45]|S\d\dE\d\d", + name, flags=re.IGNORECASE): + logger.debug("Torrent %s with name %s is non-music", info_hash, name) + self.connection.execute(""" + UPDATE torrents + SET non_music = 1 + WHERE info_hash = ?; + """, (info_hash,)) + + if peer is not None: + self.connection.execute(""" + UPDATE torrents + SET peers = CASE + WHEN INSTR(HEX(peers), HEX(:compact)) THEN -- if peer is a substring of existing peers + peers -- then don't change peers + ELSE + peers || :compact -- otherwise, concat peer on existing peers + END, + name = :name + WHERE info_hash = :info_hash; + """, {"compact": peer.compact, "name": name, "info_hash": info_hash}) + self.connection.commit() + + def get_peers(self, info_hash: bytes) -> List[TorrentPeer]: + row = self.connection.execute(""" + SELECT peers + FROM torrents + WHERE info_hash = ?; + """, (info_hash,)).fetchone() + if row is None: + return [] + return [TorrentPeer.from_compact(b) for b in grouper(row["peers"], 6)] # each peer is 6 bytes + + def get_unindexed(self, limit=10) -> List[bytes]: + rows = self.connection.execute(f""" + SELECT id, info_hash + FROM torrents + WHERE indexed = {IndexStatus.NotIndexed.value} AND (non_music IS NULL OR non_music != 1) + ORDER BY LENGTH(peers) DESC -- most popular first + LIMIT {limit}; + """).fetchall() + if not rows: + return [] + ids, info_hashes = zip(*rows) + self.connection.execute(f""" + UPDATE torrents + SET indexed = {IndexStatus.Indexing.value} + WHERE id IN ({",".join(("?"*len(ids)))}); + """, (*ids,)) + self.connection.commit() + return info_hashes + + def set_index_status(self, info_hash: bytes, index_status: IndexStatus) -> None: + self.connection.execute(f""" + UPDATE torrents + SET indexed = {index_status.value} + WHERE info_hash = ? + """, (info_hash,)) + self.connection.commit() + + def get_index_status(self, info_hash: bytes) -> Optional[IndexStatus]: + status = self.connection.execute(f""" + SELECT indexed + FROM torrents + WHERE info_hash = ? + """, (info_hash,)).fetchone() + if not status: + return None + return IndexStatus(status[0]) + + def add_songs(self, info_hash: bytes, files: Iterable[Tuple[int, str]]) -> None: + logger.debug("Adding songs %s", files) + torrent_id = self.get_torrent_id_from_hash(info_hash) + self.connection.executemany(""" + INSERT INTO songs(torrent_id, file_index, name) + VALUES (?, ?, ?); + """, ((torrent_id, file_index, name) for file_index, name in files)) + self.connection.commit() + + def search_song(self, query: str, limit=10) -> List[dict]: + query = re.sub(f"[{string.punctuation}]", "_", query) + rows = self.connection.execute(f""" + SELECT info_hash, file_index, song_name + FROM (SELECT torrent_id, file_index, name AS song_name + FROM songs + WHERE song_name MATCH ? + ORDER BY rank + LIMIT {limit}) AS best_songs + JOIN torrents ON torrents.id = best_songs.torrent_id; + """, (query,)).fetchall() + + return [{"info_hash": info_hash, "file_index": file_index, "name": name.decode()} + for info_hash, file_index, name in rows] + + def get_torrent_id_from_hash(self, info_hash: bytes) -> int: + return self.connection.execute(""" + SELECT id + FROM torrents + WHERE info_hash = ?; + """, (info_hash,)).fetchone()["id"] + + def num_hashes(self) -> int: + return self.connection.execute(""" + SELECT COUNT(id) + FROM torrents; + """).fetchone()[0] + + def num_peers(self) -> int: + # LENGTH is weird with blobs so we use HEX/2 to get the correct number of bytes. Each peer is 6 bytes. + # We also use coalesce because the sum of nothing is 0, not null. + return self.connection.execute(""" + SELECT COALESCE(SUM(LENGTH(HEX(peers)))/(2*6), 0) + FROM torrents; + """).fetchone()[0] + + def num_indexed(self, status: IndexStatus = IndexStatus.Indexed) -> int: + return self.connection.execute(f""" + SELECT COUNT(id) + FROM torrents + WHERE indexed = {status.value} + """).fetchone()[0] + + def num_non_music(self) -> int: + return self.connection.execute(""" + SELECT COUNT(id) + FROM torrents + WHERE non_music = 1 + """).fetchone()[0] + + def num_songs(self) -> int: + return self.connection.execute(""" + SELECT COUNT(*) + FROM songs + """).fetchone()[0] diff --git a/silverstream/bittorrent/dht/__init__.py b/silverstream/bittorrent/dht/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/silverstream/bittorrent/dht/config.py b/silverstream/bittorrent/dht/config.py new file mode 100644 index 0000000..be9976d --- /dev/null +++ b/silverstream/bittorrent/dht/config.py @@ -0,0 +1,12 @@ +from datetime import timedelta + + +version = "S8E101" + +id_space = 2**160 # SHA1 + +k = 8 # each bucket can hold eight nodes before becoming full +b = 5 # accelerate lookup by also splitting buckets not containing the node's ID up to b - 1 levels +alpha = 3 # system-wide lookup concurrency parameter + +fresh_time = timedelta(minutes=15) # contacts are good for 15 minutes before becoming questionable diff --git a/silverstream/bittorrent/dht/node.py b/silverstream/bittorrent/dht/node.py new file mode 100644 index 0000000..4ee09c3 --- /dev/null +++ b/silverstream/bittorrent/dht/node.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import asyncio +import itertools +import logging +import random +import secrets +from asyncio import Event +from contextlib import suppress +from copy import deepcopy +from typing import List, Iterable, AsyncIterator + +from . import config +from .peer import Contact, PeerStatus, Peer +from .protocol import KRPCProtocol +from .routing import RoutingTable +from .util import closest +from ..database import TorrentDatabase +from ..peer import TorrentPeer +from ...util import sha1, Endpoint, async_last + +logger = logging.getLogger(__name__) + + +class Node: + def __init__(self, endpoint: Endpoint, torrent_database: TorrentDatabase, id: bytes = None, + seeds: List[Contact] = None, bootstrap=True) -> None: + self.torrent_database = torrent_database + self.interface, self.port = endpoint + self.id = id or sha1(secrets.token_bytes()) + self.seeds = seeds + + self.routing_table = RoutingTable(self) + self.transport = None + self.protocol = None + self.started = Event() + asyncio.create_task(self._start(bootstrap)) + + async def _start(self, bootstrap): + logger.info("Starting node on %s:%s with id %s", self.interface, self.port, self.id) + self.transport, self.protocol = await asyncio.get_event_loop().create_datagram_endpoint( + lambda: KRPCProtocol(self), + local_addr=(self.interface, self.port) + ) + if bootstrap: + await self.bootstrap() + self.started.set() + asyncio.create_task(self._routing_table_refresher()) + + async def bootstrap(self) -> None: + logger.info("Bootstrapping") + bootstrap_node = await self._get_seed() + await self.find_nodes(self.id) + # Refresh all buckets further away than the bucket the bootstrap node falls in + await asyncio.gather(*[self.routing_table.refresh_bucket(bucket) + for bucket in self.routing_table.buckets + if not bucket.fits(bootstrap_node.id)], + return_exceptions=True) + + async def _get_seed(self) -> Peer: + # From https://github.com/qbittorrent/qBittorrent/blob/b09e32ebc07e3e6175/src/base/bittorrent/session.cpp#L1418 + seeds = [ + ("router.bittorrent.com", 6881), + ("router.utorrent.com", 6881), + ("dht.transmissionbt.com", 6881), + ("dht.aelitis.com", 6881), + ("dht.libtorrent.org", 25401), + ] + random.shuffle(seeds) + + # Return the first seed that responds to our pings + loop = asyncio.get_event_loop() + for seed in itertools.cycle(self.seeds or seeds): + logger.debug("Trying %s:%s", *seed) + with suppress(TimeoutError): + addr_info = await loop.getaddrinfo(*seed) # translate hostname to IPv4 address + contact = Contact(*addr_info[0][4]) + id = await self.protocol.send_ping(contact) + return Peer(contact.address, contact.port, id) + + async def _routing_table_refresher(self): + while True: + await asyncio.sleep(random.randint(5*60, 15*60)) + await self.routing_table.refresh_table() + + async def find_nodes(self, target: bytes) -> Iterable[Peer]: + """ + Find the contact information of the k closest nodes to the given id. + + :param target: The id of the node we seek. + :return: The contact information of the k closest found peers. + """ + logger.debug("Finding closest nodes to %s", target) + return await async_last(self._lookup(target, self.protocol.send_find_node)) + + async def get_peers(self, info_hash: bytes) -> AsyncIterator[TorrentPeer]: + """ + Get peers associated with a torrent infohash. + + :param info_hash: The infohash of the torrent. + :return: Async iterable of TorrentPeers. + """ + logger.debug("Getting peers for infohash: %s", info_hash) + async for nodes, peers in self._lookup(info_hash, self.protocol.send_get_peers, response_handler=lambda r: r[0], + yield_responses=True): + for peer in peers: + yield peer + + async def _lookup(self, target: bytes, rpc: callable, k=config.k, alpha=config.alpha, response_handler=lambda r: r, + yield_responses=False) -> AsyncIterator: + known = {} + for peer in deepcopy(self.routing_table.peers): # copy so we don't update the PeerStatus in the routing table + known[peer.id] = peer + peer.status = PeerStatus.QUESTIONABLE # we haven't queried any peer during this lookup yet + + previous_best = set() + while True: + # Of the peers we have heard of, pick the alpha closest that have not yet been queried + chosen = closest(alpha, target, {peer for peer in known.values() if peer.status == PeerStatus.QUESTIONABLE}) + + # Send parallel asynchronous queries to the chosen peers + results = await asyncio.gather(*[rpc(peer.contact, target) for peer in chosen], return_exceptions=True) + for peer, response in zip(chosen, results): + # Peers that fail to respond are removed from consideration (this updates PeerStatus in the known dict) + if isinstance(response, Exception): + peer.status = PeerStatus.BAD + continue + peers = response_handler(response) + if yield_responses: + yield response + peer.status = PeerStatus.GOOD + # Only add new nodes if they weren't already known as we don't want to overwrite existing PeerStatus + known.update({peer.id: peer + for peer in peers + if peer.id not in known}) + + best = set(closest(k, target, {peer for peer in known.values() if peer.status != PeerStatus.BAD})) + #logger.debug("Round mean distance: 2^%s", log2(statistics.mean(distance(target, p.id) for p in best))) + + # If a round of queries doesn't find any closer peers, query all k closest peers we haven't already queried + if best == previous_best: + alpha = k + + # The lookup terminates when we have queried the k closest peers we've seen + if all(peer.status == PeerStatus.GOOD for peer in best): + logger.debug("Closest: %s", best) + if not yield_responses: + yield best + return + + previous_best = best + + def json(self) -> dict: + return { + "id": self.id.hex(), + "seeds": self.seeds, + "peers": [peer.json() for peer in self.routing_table.peers] + } + + @staticmethod + def from_json(data: dict, endpoint: Endpoint, torrent_database: TorrentDatabase) -> Node: + node = Node(endpoint=endpoint, + torrent_database=torrent_database, + id=bytes.fromhex(data["id"]), + seeds=data["seeds"], + bootstrap=False) + + for peer in data["peers"]: + node.routing_table.add(Peer.from_json(peer)) + return node diff --git a/silverstream/bittorrent/dht/peer.py b/silverstream/bittorrent/dht/peer.py new file mode 100644 index 0000000..d5f1278 --- /dev/null +++ b/silverstream/bittorrent/dht/peer.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import logging +from datetime import datetime +from enum import Enum, auto +from typing import TypeVar, NamedTuple + +from . import config +from ...util import b2i, i2b + +logger = logging.getLogger(__name__) + + +class Contact(NamedTuple): + address: str + port: int + + @property + def compact(self) -> bytes: + address = bytes(map(int, self.address.split("."))) + port = i2b(self.port, length=2) + return address + port + + @staticmethod + def from_compact(b: bytes) -> Contact: + address = ".".join(map(str, b[:4])) + port = b2i(b[-2:]) + return Contact(address, port) + + +class PeerStatus(Enum): + GOOD = auto() # we've received a query/response from peer in the last 15 minutes + QUESTIONABLE = auto() # peer has been inactive for more than 15 minutes + BAD = auto() # peer has failed to respond to multiple queries + + +class Peer: + def __init__(self, address: str, port: int, id: bytes, status: PeerStatus = PeerStatus.QUESTIONABLE) -> None: + self.address = address + self.port = port + self.id: bytes = id + + self._last_seen = None + self._status = None + + self.status = status + + @property + def contact(self): + return Contact(self.address, self.port) + + @property + def compact(self) -> bytes: + return self.id + self.contact.compact + + @property + def status(self): + if self._status is not None: + return self._status + + if self._last_seen + config.fresh_time > datetime.utcnow(): + return PeerStatus.GOOD + return PeerStatus.QUESTIONABLE + + @status.setter + def status(self, status: PeerStatus): + self._status = None # reset override + if status == PeerStatus.GOOD: + self._last_seen = datetime.utcnow() + elif status == PeerStatus.QUESTIONABLE: + self._last_seen = datetime.min + elif status == PeerStatus.BAD: + self._status = PeerStatus.BAD + + @staticmethod + def from_compact(b: bytes, status: PeerStatus = PeerStatus.QUESTIONABLE) -> Peer: + id = b[:20] + address, port = Contact.from_compact(b[-6:]) + return Peer(address, port, id, status) + + def json(self) -> dict: + return { + "address": self.address, + "port": self.port, + "id": self.id.hex(), + "last_seen": self._last_seen.timestamp(), + "status": self.status.name, + } + + @staticmethod + def from_json(data: dict) -> Peer: + peer = Peer(address=data["address"], + port=data["port"], + id=bytes.fromhex(data["id"]), + status=PeerStatus[data["status"]]) + peer._last_seen = datetime.fromtimestamp(data["last_seen"]) + return peer + + def __repr__(self) -> str: + return "Peer(address={address}, port={port}, id={id}, last_seen={_last_seen}, status={status})" \ + .format(**self.__dict__, status=self.status.name) + + +PC = TypeVar("PC", Peer, Contact) diff --git a/silverstream/bittorrent/dht/protocol.py b/silverstream/bittorrent/dht/protocol.py new file mode 100644 index 0000000..9bf4f5e --- /dev/null +++ b/silverstream/bittorrent/dht/protocol.py @@ -0,0 +1,376 @@ +from __future__ import annotations + +import asyncio +import logging +import secrets +import typing +from collections import defaultdict +from contextlib import suppress +from enum import Enum +from typing import Tuple, Dict, Union, List, DefaultDict + +from . import config +from .peer import Contact, PeerStatus, Peer +from ..bencode import bdecode, bencode +from ..peer import TorrentPeer +from ...util import grouper + +if typing.TYPE_CHECKING: # PyCharm + from .node import Node + + +logger = logging.getLogger(__name__) + + +class Method(Enum): + PING = "ping" + FIND_NODE = "find_node" + GET_PEERS = "get_peers" + ANNOUNCE_PEER = "announce_peer" + + +class KRPCError(Exception): + code: int + pass + + +class GenericError(KRPCError): + code = 201 + + +class ServerError(KRPCError): + code = 202 + + +class ProtocolError(KRPCError): + code = 203 + + +class UnknownMethodError(KRPCError): + code = 204 + + +class KRPCProtocol(asyncio.DatagramProtocol): + def __init__(self, node: Node, timeout=2) -> None: + self.node = node + self.timeout = timeout + + self.transport = None + self.transactions: Dict[Tuple[Contact, bytes], Tuple[asyncio.Future, asyncio.TimerHandle]] = {} + self.sent_announce_tokens: DefaultDict[str, bytes] = defaultdict(lambda: secrets.token_bytes(8)) + self.received_announce_tokens: Dict[Contact, bytes] = {} + + def connection_made(self, transport) -> None: + self.transport = transport + + def datagram_received(self, data, addr) -> None: + # This is how you do it, apparently (https://groups.google.com/d/msg/python-tulip/z-IVH5RoDzo/SpZc0zTuPJsJ) + asyncio.create_task(self._process_data(data, Contact(*addr))) + + async def _process_data(self, data: bytes, contact: Contact) -> None: + try: + message = bdecode(data) + message_type = message["y"].decode() + handler = { + "q": self._handle_query, + "r": self._handle_response, + "e": self._handle_error, + }[message_type] + except ValueError as error: + logger.debug("Failed to decode data: %s (%s):", data, error) + return + except KeyError as error: + logger.debug("Invalid message type for data: %s (%s)", data, error) + return + await handler(contact, message) + + async def _handle_query(self, contact: Contact, query: dict) -> None: + method_name = query["q"].decode() + arguments = query["a"] + token = query["t"] + try: + method = Method(method_name) + handler = { + Method.PING: self._receive_ping, + Method.FIND_NODE: self._receive_find_node, + Method.GET_PEERS: self._receive_get_peers, + Method.ANNOUNCE_PEER: self._receive_announce_peer, + }[method] + except ValueError: + logger.debug("Received query for unknown method: %s", method_name) + return await self._send_error(contact, token, UnknownMethodError(f"Unknown Method '{method_name}'")) + + logger.debug("Received %s from %s: %s", method.name, contact, query) + + # Let our routing table know about this GOOD peer + peer = Peer(contact.address, contact.port, arguments.pop("id"), status=PeerStatus.GOOD) + self.node.routing_table.add(peer) + + try: + result = await handler(peer, **arguments) + except KRPCError as error: + return await self._send_error(contact, token, error) + except Exception as error: + logger.debug("Exception in query handler: %s", error) + logger.exception(error) + return await self._send_error(contact, token, ServerError("Server Error")) + + response = { + "t": token, + "v": config.version, + "y": "r", + "r": { + "id": self.node.id, + **result + } + } + logger.debug("Sending %s response to %s: %s", method.name, peer, response) + self.transport.sendto(bencode(response), contact) + + async def _handle_response(self, contact: Contact, response: dict) -> None: + logger.debug("Received response from %s: %s", contact, response) + transaction_id = response["t"] + result = response["r"] + + # Let our routing table know about this GOOD peer + self.node.routing_table.add(Peer(contact.address, contact.port, result["id"], status=PeerStatus.GOOD)) + + self._set_result(contact, transaction_id, result) + + async def _handle_error(self, contact: Contact, response: dict) -> None: + logger.debug("Received error from %s: %s", contact, response) + transaction_id = response["t"] + error = response["e"] + + # Let our routing table know that this peer is responding (even though it gave us an error) + self.node.routing_table.update_peer_status(contact, PeerStatus.GOOD) + + self._set_result(contact, transaction_id, TimeoutError(error)) # not technically a timeout but whatever + + async def _send_error(self, contact: Contact, token: bytes, error: KRPCError) -> None: + response = { + "t": token, + "v": config.version, + "y": "e", + "e": [error.code, str(error)] + } + logger.debug("Sending %s error to %s: %s", error, contact, response) + self.transport.sendto(bencode(response), contact) + + def _set_result(self, contact: Contact, transaction_id: bytes, result: Union[dict, Exception]) -> None: + try: + future, timeout = self.transactions.pop((contact, transaction_id)) + except KeyError: + logger.debug("Unknown transaction id %s for %s", transaction_id, contact) + return + timeout.cancel() + if future.done(): # TODO: This should never happen + logger.error("About to set %s on future %s but future already done!", result, future) + return + if isinstance(result, Exception): + future.set_exception(result) + else: + future.set_result(result) + + def _query(self, contact: Contact, method: Method, arguments: dict = None) -> asyncio.Future: + transaction_id = secrets.token_bytes(2) # generated by the querying node and echoed in the response + query = { + "t": transaction_id, + "v": config.version, + "y": "q", + "q": method.value, + "a": { + "id": self.node.id, + **(arguments or {}) + } + } + # Register asyncio future callback + loop = asyncio.get_event_loop() + future = loop.create_future() + timeout = loop.call_later(self.timeout, self._timeout, contact, transaction_id) + self.transactions[contact, transaction_id] = (future, timeout) + + logger.debug("Sending %s to %s: %s", method.name, contact, query) + self.transport.sendto(bencode(query), contact) + + return future + + def _timeout(self, contact: Contact, transaction_id: bytes) -> None: + logger.debug("Request %s to %s timed out", transaction_id, contact) + self._set_result(contact, transaction_id, TimeoutError("Request timed out")) + # Let our routing table know that this peer is BAD + self.node.routing_table.update_peer_status(contact, PeerStatus.BAD) + + # PING + async def send_ping(self, contact: Contact) -> bytes: + """ + The most basic query is a ping. + + :param contact: The contact to ping. + :return: The queried contacts's id. + """ + result = await self._query(contact, Method.PING) + return result["id"] + + async def _receive_ping(self, peer: Peer, **kwargs) -> dict: + """ + The appropriate response to a ping has a single key "id" containing the node ID of the responding node. + + :param peer: The querying node. + :return: An empty dict, as our own id will be added automatically later. + """ + if kwargs: + logger.debug("_receive_ping received additional kwargs: %s", kwargs) + + return {} # our id will be added automatically later + + # FIND_NODE + async def send_find_node(self, contact: Contact, target: bytes) -> List[Peer]: + """ + Find node is used to find the contact information for a peer given its ID. + + :param contact: The contact to send the find_node query to. + :param target: The id of the node we seek. + :return: The contact information of the K closest GOOD peers in the contacts's routing table. + """ + result = await self._query(contact, Method.FIND_NODE, {"target": target}) + return decode_nodes(result["nodes"]) + + async def _receive_find_node(self, peer: Peer, target: bytes, want: List[bytes] = None, **kwargs) -> dict: + """ + Should respond with a key "nodes" and value of a string containing the compact node info for the K closest GOOD + peers in our own routing table. + + :param peer: The querying node. + :param target: The id of the node the sender seeks. + :param want: List containing either b'n4', b'n6' or both, indicating that the querying node requests IPv4 or + IPv6 nodes, respectively. + :return: The contact information of the target node or the K closest GOOD nodes in the our routing table. + """ + if kwargs: + logger.debug("_receive_find_node received additional kwargs: %s", kwargs) + + nodes = self.node.routing_table.closest(target, status=PeerStatus.GOOD) + return {"nodes": encode_nodes(nodes)} + + # GET_PEERS + async def send_get_peers(self, contact: Contact, info_hash: bytes) -> Tuple[List[Peer], List[TorrentPeer]]: + """ + Get peers associated with a torrent infohash. + + :param contact: The contact to send the get_peers query to. + :param info_hash: The infohash of the torrent. + :return: Tuple of nodes close to the infohash and TorrentPeers. + """ + result = await self._query(contact, Method.GET_PEERS, {"info_hash": info_hash}) + self.received_announce_tokens[contact] = result["token"] + + peers = decode_nodes(result.get("nodes", [])) + torrent_peers = decode_torrent_peers(result.get("values", [])) + return peers, torrent_peers + + async def _receive_get_peers(self, peer: Peer, info_hash: bytes, want: List[bytes] = None, noseed=False, + scrape=False, **kwargs) -> dict: + """ + Return a key "nodes", containing the K nodes in the our routing table closest to the infohash. In addition, if + we have peers for the infohash, return them in a key "values" as a list of TorrentPeers. A "token" key is also + included in the return value. The token value is a required argument for a future announce_peer query. + + :param peer: The querying node. + :param info_hash: The infohash of the torrent. + :param want: List containing either b'n4', b'n6' or both, indicating that the querying node requests IPv4 or + IPv6 nodes, respectively. + :param noseed: If true we should try to fill the values list with non-seed items on a best-effort basis. + :param scrape: If true and we and have database entries for the hash we must add bloom filters to the response. + :return: A token and both the TorrentPeers for the given infohash as well as the K closest nodes to it. + """ + if kwargs: + logger.debug("_receive_get_peers received additional kwargs: %s", kwargs) + + token = self.sent_announce_tokens[peer.address] + nodes = self.node.routing_table.closest(info_hash, status=PeerStatus.GOOD) + response = {"token": token, "nodes": encode_nodes(nodes)} + + peers = self.node.torrent_database.get_peers(info_hash) + if peers: + response.update({"values": encode_torrent_peers(peers)}) + + # TODO + # Add info_hash (and maybe peer?) to the peer database if we are sure the peer isn't obfuscating the infohash. + # How do we detect this? + # https://github.com/arvidn/libtorrent/blob/master/src/kademlia/get_peers.cpp#L233 + # https://github.com/arvidn/libtorrent/blob/master/src/kademlia/node_id.cpp#L62 + + return response + + # ANNOUNCE_PEER + async def send_announce_peer(self, contact: Contact, info_hash: bytes, port: int, implied_port=False) -> bytes: + """ + Announce that we are downloading a torrent on the supplied port. + + :param contact: The contact to send the announce_peer query to. + :param info_hash: The infohash of the torrent we are downloading. + :param port: The port our torrent client is listening on. + :param implied_port: If true, the port argument should be ignored and the source port of the UDP packet should + be used as port instead (useful for peers behind a NAT that may not know their external port). + :return: The queried contacts's id. + """ + arguments = { + "info_hash": info_hash, + "port": port, + "token": self.received_announce_tokens[contact], + "implied_port": int(implied_port) + } + result = await self._query(contact, Method.ANNOUNCE_PEER, arguments) + return result["id"] + + async def _receive_announce_peer(self, peer: Peer, info_hash: bytes, token: bytes, port: int = None, + implied_port=False, seed=False, name: bytes = None, **kwargs) -> dict: + """ + Store the contact information of the querying node under the infohash in the database. + + :param peer: The querying node. + :param info_hash: The infohash of the torrent they are downloading. + :param token: The "token" we gave the querying node in response to a previous get_peers query. We must verify + that this token was previously sent to the same IP address as the querying node. + :param port: The port their torrent client is listening on. + :param implied_port: If true, the port argument should be ignored and the source port of the UDP packet should + be used as port instead (useful for peers behind a NAT that may not know their external port). + :param seed: True if the querying node is seeding the torrent it announces. + :param name: Name of the corresponding torrent file. + :return: An empty dict, as our own id will be added automatically later. + """ + if kwargs: + logger.debug("_receive_announce_peer received additional kwargs: %s", kwargs) + + if token != self.sent_announce_tokens[peer.address]: + raise ProtocolError("Invalid announce token") + + # If implied_port, the source port of the UDP packet should be used instead of the port argument + if implied_port: + port = peer.port + + with suppress(AttributeError): + name = name.decode(errors="replace") + + self.node.torrent_database.add_peer(info_hash, TorrentPeer(peer.address, port), name=name) + return {} + + +def decode_nodes(nodes: bytes) -> List[Peer]: + # Contact information for nodes is encoded as 26-byte strings concatenated together (wtf) + compact_nodes = map(bytes, grouper(nodes, 26)) + return [Peer.from_compact(node) for node in compact_nodes] + + +def encode_nodes(peers: List[Peer]) -> bytes: + return b"".join(p.compact for p in peers) + + +def decode_torrent_peers(peers: List[bytes]) -> List[TorrentPeer]: + # Contact information for TorrentPeers is encoded as a list of 6-byte strings (actually sensible, but inconsistent) + return [TorrentPeer.from_compact(peer) for peer in peers] + + +def encode_torrent_peers(peers: List[TorrentPeer]) -> List[bytes]: + return [p.compact for p in peers] diff --git a/silverstream/bittorrent/dht/routing.py b/silverstream/bittorrent/dht/routing.py new file mode 100644 index 0000000..d24d5bf --- /dev/null +++ b/silverstream/bittorrent/dht/routing.py @@ -0,0 +1,236 @@ +from __future__ import annotations + +import asyncio +import logging +import random +import typing +from collections import OrderedDict +from contextlib import suppress +from typing import Tuple, List, Dict, Set, Iterator + +from . import config +from .peer import PeerStatus +from .util import closest +from ...util import b2i, i2b, log2 + +if typing.TYPE_CHECKING: # PyCharm + from .node import Node + from .peer import Contact, Peer + +logger = logging.getLogger(__name__) + + +class ReplacementCache: + def __init__(self, maxlen: int) -> None: + self.maxlen = maxlen + self.peers = OrderedDict() + + def add(self, peer: Peer) -> None: + compact = peer.compact + try: + self.peers.move_to_end(compact, last=True) # the cache should be kept sorted by time last seen + except KeyError: + if len(self.peers) >= self.maxlen: + self.peers.popitem(last=False) # remove oldest if full to make room for the new one + self.peers[compact] = peer # save peer in cache (or update its status) + + def pop(self) -> Peer: + compact, peer = self.peers.popitem(last=True) + return peer + + def __len__(self) -> int: + return self.peers.__len__() + + def __iter__(self) -> Iterator[Peer]: + yield from self.peers.values() + + +class Bucket(dict): + size = config.k + + def __init__(self, min: int, max: int) -> None: + super().__init__() + self.min = min + self.max = max + + self.replacements: ReplacementCache = ReplacementCache(maxlen=self.size) + + @property + def full(self) -> bool: + return len(self) >= self.size + + @property + def fresh(self) -> bool: + return any(peer.status == PeerStatus.GOOD for peer in self.values()) + + @property + def depth(self): + """ + Return the depth of the bucket, i.e. the number of prefix bits shared by all contacts in this bucket. + """ + return int(log2(config.id_space // (self.max - self.min))) + + def fits(self, id: bytes) -> bool: + """ + Return whether or not the given node/peer id fits in this bucket. + """ + return self.min <= b2i(id) < self.max + + def add(self, peer: Peer) -> bool: + """ + Add the given peer to the bucket. + + :return: True if the peer was added to the bucket, otherwise False. + """ + # If the bucket is not full or the peer is already present, peer is added or updated, respectively + if peer.contact in self or not self.full: + self[peer.contact] = peer + return True + # Otherwise, if any peers in the bucket are known to have become bad, then one is replaced by the new peer + self.replacements.add(peer) + return self.fill() + + def fill(self) -> bool: + """ + Fill the bucket using peers from the replacement cache if it isn't full. Also replaces bad peers if any. + + :return: True if any new peer was added to the bucket, otherwise False. + """ + added = False + # Fill bucket using replacements + while not self.full and self.replacements: + self.add(self.replacements.pop()) + added = True + + # Replace bad peers + bad = {contact for contact, peer in self.items() if peer.status == PeerStatus.BAD} + while bad and self.replacements: + del self[bad.pop()] + self.add(self.replacements.pop()) + added = True + + return added + + def split(self) -> Tuple[Bucket, Bucket]: + """ + Split the bucket in two, dividing the contents between them. + + :return: The two new buckets. + """ + logger.debug("Splitting bucket") + half = (self.min + self.max) // 2 + left = Bucket(self.min, half) + right = Bucket(half, self.max) + + # Divide peers + for peer in self.values(): + bucket = left if left.fits(peer.id) else right + bucket.add(peer) + + # Divide replacement peers + for peer in self.replacements: + bucket = left if left.fits(peer.id) else right + bucket.replacements.add(peer) # this doesn't change the replacement cache order + + # Fill buckets to capacity using replacement cache + left.fill() + right.fill() + + return left, right + + def __repr__(self) -> str: + return "Bucket(min=2^{min}, max=2^{max}, fresh={fresh}, full={full}, peers={peers}," \ + " replacements={replacements})".format(min=log2(self.min), + max=log2(self.max), + fresh=self.fresh, + full=self.full, + peers=list(self.values()), + replacements=self.replacements) + + +class RoutingTable: + def __init__(self, node: Node) -> None: + self.node = node + + self.buckets: List[Bucket] = [Bucket(0, config.id_space)] # initially, the table has a single bucket of the entire ID space + + @property + def peers(self) -> Set[Peer]: + return {peer + for bucket in self.buckets + for peer in bucket.values()} + + @property + def contacts(self) -> Dict[Contact, Peer]: + return {contact: peer + for bucket in self.buckets + for contact, peer in bucket.items()} + + @property + def replacements(self) -> Set[Peer]: + return {peer + for bucket in self.buckets + for peer in bucket.replacements} + + def find_bucket(self, id: bytes) -> Bucket: + """ + Find the appropriate bucket for the given id. + """ + return next(bucket for bucket in self.buckets if bucket.fits(id)) + + def add(self, peer: Peer) -> None: + """ + Add the given peer to the appropriate bucket in the routing table. + """ + if peer.id == self.node.id: + return + logger.debug("Adding %s to routing table", peer) + bucket = self.find_bucket(peer.id) + + # Try to add the peer to the appropriate bucket + if bucket.add(peer): + return + # Otherwise, if the bucket's range includes our own ID, it is split into two and the insertion attempt repeated + if bucket.fits(self.node.id) or bucket.depth % config.b != 0: + self.buckets.remove(bucket) + self.buckets.extend(bucket.split()) + self.add(peer) + + def closest(self, id: bytes, k=config.k, status: PeerStatus = None) -> List[Peer]: + """ + Return a list with the k closest peers to the given id in our buckets, optionally with given status. + """ + peers = self.peers + if status is not None: + peers = {peer for peer in peers if peer.status == status} + return closest(k, id, peers) + + async def refresh_bucket(self, bucket: Bucket) -> None: + """ + Refresh the bucket by picking a random ID in the range of the bucket and performing a find_nodes search on it. + """ + logger.debug("Refreshing bucket %s", bucket) + random_id = i2b(random.randint(bucket.min, bucket.max), length=20) # 20*8 bytes = 160 bits + await self.node.find_nodes(random_id) + + async def refresh_table(self) -> None: + """ + Refresh all buckets that have not been changed in 15 minutes. + """ + if all(peer.status == PeerStatus.BAD for peer in self.peers): + logger.warning("All peers in routing table have gone bad; bootstrapping again") + await self.node.bootstrap() + await asyncio.gather(*[self.refresh_bucket(bucket) + for bucket in self.buckets + if not bucket.fresh], + return_exceptions=True) + + def update_peer_status(self, contact: Contact, status: PeerStatus) -> None: + """ + Update the peer status for a peer given its contact information. + """ + with suppress(KeyError): + peer = self.contacts[contact] + peer.status = status + logger.debug("Updated status for %s to %s", contact, status.name) + self.find_bucket(peer.id).fill() diff --git a/silverstream/bittorrent/dht/util.py b/silverstream/bittorrent/dht/util.py new file mode 100644 index 0000000..244a346 --- /dev/null +++ b/silverstream/bittorrent/dht/util.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import heapq +import logging +import typing +from typing import Iterable, List + +from ...util import b2i + +if typing.TYPE_CHECKING: # PyCharm + from .peer import PC + +logger = logging.getLogger(__name__) + + +def distance(b1: bytes, b2: bytes) -> int: + """ + Calculate Kademlia distance metric, which is used to compare two node IDs or a node ID and an infohash. + + :param b1: Node ID or infohash. + :param b2: Node ID or infohash. + :return: XOR of the two inputs, interpreted as an unsigned integer. + """ + return b2i(b1) ^ b2i(b2) + + +def closest(k: int, id: bytes, nodes: Iterable[PC]) -> List[PC]: + """ + Return a list with the k closest nodes to the given id from the dataset. + + :param k: The number of nodes to return. + :param id: The target id. + :param nodes: Iterable of Peers or Contacts. + :return: List of closest nodes. + """ + return heapq.nsmallest(k, nodes, key=lambda n: distance(id, n.id)) diff --git a/silverstream/bittorrent/indexer.py b/silverstream/bittorrent/indexer.py new file mode 100644 index 0000000..8a898e5 --- /dev/null +++ b/silverstream/bittorrent/indexer.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import asyncio +import logging +import typing +from pathlib import Path + +import libtorrent + +from .bencode import bencode +from .client import TorrentClient +from .database import IndexStatus +from ..config import data_dir +from ..util import make_data_dirs + +if typing.TYPE_CHECKING: # PyCharm + from .database import TorrentDatabase + +logger = logging.getLogger(__name__) + + +class Indexer: + def __init__(self, torrent_database: TorrentDatabase, torrent_client: TorrentClient, workers=25, timeout=30, + save_torrents=False) -> None: + self.torrent_database = torrent_database + self.torrent_client = torrent_client + self.workers = workers + + if save_torrents: + make_data_dirs("torrents/") + + if workers: + logger.info("Starting %s workers", workers) + self.unindexed = asyncio.Queue(maxsize=self.workers) + asyncio.create_task(self.enqueuer()) + for _ in range(self.workers): + asyncio.create_task(self.worker(timeout, save_torrents)) + + async def enqueuer(self): + while True: + info_hashes = self.torrent_database.get_unindexed(limit=self.workers) + for info_hash in info_hashes: + await self.unindexed.put(info_hash) + if not info_hashes: + await asyncio.sleep(30) # avoid spamming the database for hashes if there are none + + async def worker(self, timeout, save_torrents): + while True: + info_hash = await self.unindexed.get() + logger.debug("Indexing %s", info_hash) + try: + torrent_info = await asyncio.wait_for(self.torrent_client.get_torrent_info(info_hash), timeout=timeout) + except asyncio.TimeoutError: + logger.debug("Timed out downloading torrent info for %s", info_hash) + self.torrent_database.set_index_status(info_hash, IndexStatus.IndexingFailed) + continue + + if save_torrents: + name = f"{info_hash.hex()}-{torrent_info.name()[:64]}.torrent" # Avoids OSError: Filename too long + torrent = libtorrent.create_torrent(torrent_info) + with data_dir.joinpath("torrents/", name).open("wb") as file: + file.write(bencode(torrent.generate())) + songs = [] + files = torrent_info.files() + for file_index in range(files.num_files()): + file_path = Path(files.file_path(file_index)) + if file_path.suffix in (".aac", ".flac", ".m4a", ".mp3", ".mpc", ".ogg", ".opus", ".wav", ".wma"): + songs.append((file_index, " / ".join(file_path.parts))) + + if songs: + self.torrent_database.add_songs(info_hash, songs) + self.torrent_database.set_index_status(info_hash, IndexStatus.Indexed) diff --git a/silverstream/bittorrent/peer.py b/silverstream/bittorrent/peer.py new file mode 100644 index 0000000..8475854 --- /dev/null +++ b/silverstream/bittorrent/peer.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import NamedTuple + +from ..util import i2b, b2i + + +class TorrentPeer(NamedTuple): + address: str + port: int + + @property + def compact(self) -> bytes: + address = bytes(map(int, self.address.split("."))) + port = i2b(self.port, length=2) + return address + port + + @staticmethod + def from_compact(b: bytes) -> TorrentPeer: + address = ".".join(map(str, b[:4])) + port = b2i(b[-2:]) + return TorrentPeer(address, port) diff --git a/silverstream/cli.py b/silverstream/cli.py new file mode 100644 index 0000000..c2bff56 --- /dev/null +++ b/silverstream/cli.py @@ -0,0 +1,271 @@ +import argparse +import asyncio +import cmd +import inspect +import logging +import logging.config +import logging.config +import shutil +from asyncio import AbstractEventLoop +from collections import Counter +from threading import Thread + +from . import __version__, __author__, config, util +from .bittorrent.client import TorrentClient +from .bittorrent.crawler import Crawler +from .bittorrent.database import TorrentDatabase +from .bittorrent.dht.peer import Contact +from .bittorrent.indexer import Indexer +from .player import Player +from .stats import StatisticsLogger +from .util import get_default_arg, humanize + +logger = logging.getLogger(__name__) + + +class Cli(cmd.Cmd): + prompt = "silverstream> " + ruler = None + + def __init__(self, loop: AbstractEventLoop, torrent_database: TorrentDatabase, torrent_client: TorrentClient, + indexer: Indexer, crawler: Crawler, player: Player) -> None: + super().__init__() + self.loop = loop + self.torrent_database = torrent_database + self.torrent_client = torrent_client + self.indexer = indexer + self.crawler = crawler + self.player = player + + def run(self): + # Start cmdloop with bash-like ctrl-c support + while True: + try: + self.cmdloop() + break + except KeyboardInterrupt: + print("^C") + except ValueError as e: + print(e) + self.intro = None + + def do_status(self, _): + crawler_peers = Counter(p.compact for p in self.crawler.peers) + crawler_replacements = Counter(p.compact for p in self.crawler.replacements) + database_hashes = self.torrent_database.num_hashes() + database_indexed = self.torrent_database.num_indexed() + database_nonmusic = self.torrent_database.num_non_music() + try: + database_indexed_music_fraction = database_indexed / database_hashes + except ZeroDivisionError: + database_indexed_music_fraction = 0 + client = self.torrent_client.status + + print(inspect.cleandoc( + f""" + Torrent Database: + Hashes: {database_hashes:,} + Peers: {self.torrent_database.num_peers():,} + Non-music: {database_nonmusic:,} + Indexed: {database_indexed:,} ({database_indexed_music_fraction:.2%}) + Songs: {self.torrent_database.num_songs():,} + + Crawler: + Nodes: {len(self.crawler.nodes)} + DHT Peers: {sum(crawler_peers.values()):,} ({len(crawler_peers.keys()):,} unique) + Replacements: {sum(crawler_replacements.values()):,} ({len(crawler_replacements.keys()):,} unique) + + BitTorrent Client: + Peers: {client.num_peers} + Download Rate: {humanize(client.download_rate, suffix='B/s')} + Upload Rate: {humanize(client.upload_rate, suffix='B/s')} + Total Download: {humanize(client.total_download)} + Total Upload: {humanize(client.total_upload)} + """ + )) + + def do_search(self, query): + songs = self.torrent_database.search_song(query) + print("Id | Name") + print("---|----------------------------------------------------------------------") + for i, song in enumerate(songs): + print(f"{i:<2} | {song['name']}") + choice = int(input("Song: ")) + song = songs[choice] + asyncio.run_coroutine_threadsafe(self.player.download_and_play(song["name"], song["info_hash"], + song["file_index"]), self.loop) + + def do_peers(self, info_hash): + for peer in self.torrent_database.get_peers(bytes.fromhex(info_hash)): + print(peer) + + def do_pause(self, _): + self.player.pause() + + def do_resume(self, _): + self.player.resume() + + def do_playing(self, _): + self.player.currently_playing() + + def do_save(self, _): + self.crawler.save() + + def do_exit(self, _): + return True + + do_EOF = do_exit # Ctrl+D + + def emptyline(self): + """ + Method called when an empty line is entered in response to the prompt. + If this method is not overridden, it repeats the last nonempty command entered. + """ + pass + + def postloop(self): + """ + Hook method executed once when the cmdloop() method is about to return. Do cleanup here. + """ + print("Exiting..") + self.player.terminate() + + +def parse_args(): + parser = argparse.ArgumentParser(prog="silverstream", + formatter_class=lambda **kw: argparse.ArgumentDefaultsHelpFormatter( + **kw, + max_help_position=35, + width=shutil.get_terminal_size().columns - 2)) + + def at_least(n, string): + value = int(string) + if value < n: + raise argparse.ArgumentTypeError(f"Must be at least {n}") + return value + + parser.add_argument("--interface", + type=str, + default="0.0.0.0", + metavar="interface", + help="Network interface to bind to.") + parser.add_argument("--port", + type=int, + default=6881, + metavar="port", + help="Network port to listen listen on. Ports are bound consecutively from this port.") + parser.add_argument("--load", + action="store_true", + help="Load state from file (use 'save' from the cli to save).") + parser.add_argument("--stats", + action="store_true", + help="Save statistics to file.") + parser.add_argument("-v", "--verbose", + action="count", + default=0, + help="Increase verbosity level. Can be used multiple times.") + parser.add_argument("--clean", + action="store_true", + help="Remove data directory.") + + crawler = parser.add_argument_group("Crawler") + crawler.add_argument("--crawler-nodes", + type=lambda s: at_least(1, s), + default=get_default_arg(Crawler, "num_nodes"), + metavar="nodes", + help="Number of BitTorrent DHT nodes to start.") + crawler.add_argument("--crawler-delay", + type=int, + default=get_default_arg(Crawler, "delay"), + metavar="seconds", + help="Number of seconds to wait between starting each of the BitTorrent DHT nodes.") + crawler.add_argument("--crawler-await-bootstrap", + action="store_true", + help="Wait for nodes to fully bootstrap before starting the next one.") + + indexer = parser.add_argument_group("Indexer") + indexer.add_argument("--indexer-workers", + type=int, + default=get_default_arg(Indexer, "workers"), + metavar="workers", + help="Number of Indexer workers to start.") + indexer.add_argument("--indexer-save-torrents", + action="store_true", + help="Save indexed torrents to torrents/.") + + btdht = parser.add_argument_group("BitTorrent DHT") + btdht.add_argument("--btdht-seed", + type=str, + action="append", + metavar="host:port", + help="BitTorrent DHT seed nodes. Overrides default seeds. Can be specified multiple times.") + + return parser.parse_args() + + +def main(): + args = parse_args() + + print(f"silverstream v{__version__}") + print(f"(c) {__author__}") + + if args.clean: + print(f"WARNING: Removing {config.data_dir}") + shutil.rmtree(config.data_dir, ignore_errors=True) + + # Configure logging + util.make_data_dirs("logs/") + logging_level = ("WARNING", "INFO", "DEBUG")[min(args.verbose, 2)] + logging.config.dictConfig(config.logging(level=logging_level)) + print("Logging level is", logging_level) + + print("Initializing components..") + event_loop = asyncio.get_event_loop() + event_loop.set_debug(logging_level == "DEBUG") + torrent_database, torrent_client, indexer, crawler, player = event_loop.run_until_complete(initialize(args)) + + Thread(target=event_loop.run_forever, daemon=True).start() + + cli = Cli(event_loop, torrent_database, torrent_client, indexer, crawler, player) + cli.run() + + +async def initialize(args): + # Process args + endpoints = util.endpoints(args.interface, args.port) + if args.btdht_seed is not None: + args.btdht_seed = [Contact(*seed.split(":")) for seed in args.btdht_seed] + + torrent_database = TorrentDatabase() + + if args.load: + print("Loading from file") + crawler = Crawler.load(endpoints=endpoints, + torrent_database=torrent_database) + else: + crawler = Crawler(endpoints=endpoints, + num_nodes=args.crawler_nodes, + delay=args.crawler_delay, + seeds=args.btdht_seed, + torrent_database=torrent_database, + await_bootstrap=args.crawler_await_bootstrap) + + await crawler.started.wait() + + torrent_client = TorrentClient(endpoint=next(endpoints), + torrent_database=torrent_database, + dht_nodes=crawler.nodes) + indexer = Indexer(torrent_database=torrent_database, + torrent_client=torrent_client, + workers=args.indexer_workers, + save_torrents=args.indexer_save_torrents) + player = Player(torrent_client=torrent_client) + + if args.stats: + StatisticsLogger(torrent_database, indexer, crawler) + + return torrent_database, torrent_client, indexer, crawler, player + + +if __name__ == '__main__': + main() diff --git a/silverstream/config.py b/silverstream/config.py new file mode 100644 index 0000000..8990248 --- /dev/null +++ b/silverstream/config.py @@ -0,0 +1,56 @@ +import os +import sys +from pathlib import Path + + +def get_data_dir() -> Path: + if sys.platform == "win32": + return Path("~/AppData/Local/silverstream").expanduser() + if sys.platform == "darwin": + return Path("~/Library/Caches/silverstream").expanduser() + return Path(os.getenv("XDG_CACHE_HOME", "~/.cache/"), "silverstream").expanduser() + + +data_dir = get_data_dir() + + +def logging(level="WARNING"): + return { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": { + "format": "%(asctime)s [%(levelname)-7s] %(name)s:%(funcName)s - %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + "formatter": "standard", + "level": level, + }, + "main": { + "class": "logging.handlers.RotatingFileHandler", + "maxBytes": 5_242_880, # 5 MiB + "backupCount": 10, + "filename": data_dir.joinpath("logs/main.log"), + "encoding": "utf-8", + "formatter": "standard", + "level": "DEBUG", + }, + "warnings": { + "class": "logging.FileHandler", + "filename": data_dir.joinpath("logs/warnings.log"), + "encoding": "utf-8", + "formatter": "standard", + "level": "WARNING", + } + }, + "loggers": { + "silverstream": { + "level": "DEBUG", + "handlers": ["console", "main", "warnings"] + } + }, + } diff --git a/silverstream/player.py b/silverstream/player.py new file mode 100644 index 0000000..3c82b69 --- /dev/null +++ b/silverstream/player.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import asyncio +import logging +from pathlib import Path + +import mpv + +from .bittorrent.client import TorrentClient, torrent_eta +from .config import data_dir + +logger = logging.getLogger(__name__) + + +class Player: + def __init__(self, torrent_client: TorrentClient) -> None: + self.torrent_client = torrent_client + self.mpv = mpv.MPV() + + async def download_and_play(self, song_name: str, info_hash: bytes, file_index: int) -> None: + print("Downloading", song_name) + asyncio.create_task(self.torrent_client.download(info_hash, files=[file_index])) + handler = await self.torrent_client.get_torrent_handle(info_hash) + bit_rates = {".mp3": 128_000, ".flac": 512_000, ".ogg": 128_000, ".opus": 128_000} + stream_rate = bit_rates.get(Path(song_name).suffix) + logger.info("File's stream bit rate is: %s", stream_rate) + + # Wait until download is complete or we're downloading sufficiently fast for streaming the given audio format + while not handler.is_finished() and not (stream_rate and handler.status().download_payload_rate > stream_rate) \ + and torrent_eta(handler.status()) > 150: + await asyncio.sleep(1) + logger.debug("Playback ready") + path = data_dir.joinpath("downloads/", handler.get_torrent_info().files().file_path(file_index)) + # Wait until libtorrent has flushed at least 1 MiB from .parts to final file + while not path.exists() or path.stat().st_size < 1_048_576: # 1 MiB + logger.debug("Waiting for file") + await asyncio.sleep(1) + self.play(song_name, path) + + def play(self, song_name: str, path: Path = None): + print("Playing", song_name) + if path is None: + self.mpv.playlist_pos = 0 + return + self.mpv.play(str(path)) # overrides playlist completely, otherwise use add_to_playlist and skip to the song + + def pause(self): + self.mpv.pause = True + + def resume(self): + self.mpv.pause = False + + def skip(self): + self.mpv.seek(99999) # force player to end of song, essentially skipping + + def add_to_playlist(self, path: Path): + self.mpv.playlist_append(str(path)) + + def skip_to_song_or_idx(self, path: Path, idx=None): + if idx: + self.mpv.playlist_pos = idx + return + + for idx, song in enumerate(self.mpv.playlist): + if song["filename"] == str(path): + self.mpv.playlist_pos = idx + + def remove_from_playlist(self, path: Path): + for idx, song in enumerate(self.mpv.playlist): + if song["filename"] == str(path): + self.mpv.playlist_remove(idx) + + def clear_playlist(self): + self.mpv.playlist_clear() + + def display_playlist(self): + print(self.mpv.playlist) + + def next_song(self): + self.mpv.playlist_next() + + def previous_song(self): + self.mpv.playlist_prev() + + def currently_playing(self): + print(self.mpv.playlist[self.mpv.playlist_pos]["filename"]) + + def terminate(self): + self.mpv.terminate() diff --git a/silverstream/stats.py b/silverstream/stats.py new file mode 100644 index 0000000..0cf0cd1 --- /dev/null +++ b/silverstream/stats.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import asyncio +import json +import logging +from collections import Counter +from datetime import datetime + +from . import util +from .bittorrent.crawler import Crawler +from .bittorrent.database import TorrentDatabase, IndexStatus +from .bittorrent.indexer import Indexer +from .config import data_dir + +logger = logging.getLogger(__name__) + + +class StatisticsLogger: + def __init__(self, torrent_database: TorrentDatabase, indexer: Indexer, crawler: Crawler): + self.torrent_database = torrent_database + self.indexer = indexer + self.crawler = crawler + util.make_data_dirs("statistics/") + logger.info("Statistics enabled") + asyncio.create_task(self._stat_saver()) + + async def _stat_saver(self): + while True: + with data_dir.joinpath("statistics/stats.json").open("a") as file: + file.write(json.dumps(self._generate_stat())) + file.write("\n") + await asyncio.sleep(5*60) + + def _generate_stat(self): + crawler_peers = Counter(p.compact for p in self.crawler.peers) + crawler_replacements = Counter(p.compact for p in self.crawler.replacements) + return { + "utc": int(datetime.utcnow().timestamp()), + "data": { + "torrent_database": { + "num_hashes": self.torrent_database.num_hashes(), + "num_peers": self.torrent_database.num_peers(), + "num_notindexed": self.torrent_database.num_indexed(status=IndexStatus.NotIndexed), + "num_indexed": self.torrent_database.num_indexed(status=IndexStatus.Indexed), + "num_indexing": self.torrent_database.num_indexed(status=IndexStatus.Indexing), + "num_indexingfailed": self.torrent_database.num_indexed(status=IndexStatus.IndexingFailed), + "num_non_music": self.torrent_database.num_non_music(), + "num_songs": self.torrent_database.num_songs(), + }, + "crawler": { + "num_nodes": len(self.crawler.nodes), + "num_peers": sum(crawler_peers.values()), + "num_peers_unique": len(crawler_peers.keys()), + "num_replacements": sum(crawler_replacements.values()), + "num_replacements_unique": len(crawler_replacements.keys()), + }, + "indexer": { + "workers": self.indexer.workers, + }, + } + } diff --git a/silverstream/util.py b/silverstream/util.py new file mode 100644 index 0000000..5080039 --- /dev/null +++ b/silverstream/util.py @@ -0,0 +1,187 @@ +import asyncio +import collections +import hashlib +import inspect +import math +from itertools import zip_longest +from typing import (Dict, Any, Coroutine, TypeVar, List, Tuple, AsyncIterable, Union, Iterable, Iterator, AsyncIterator, + NamedTuple) + +from . import config + +T = TypeVar("T") + + +def make_data_dirs(*paths): + for path in paths: + config.data_dir.joinpath(path).mkdir(parents=True, exist_ok=True) + + +class Endpoint(NamedTuple): + interface: str + port: int + + def __str__(self) -> str: + return f"{self.interface}:{self.port}" + + +def endpoints(interface: str, start_port: int) -> Iterator[Endpoint]: + while True: + yield Endpoint(interface, start_port) + start_port += 1 + + +def b2i(b: bytes) -> int: + """ + Convert a sequence of bytes to an integer. + + :param b: The array of bytes to convert. + :return: The integer represented by the given array of bytes in unsigned network byteorder. + """ + return int.from_bytes(b, byteorder="big", signed=False) + + +def i2b(i: int, length: int = None) -> bytes: + """ + Convert an integer to a sequence of bytes of the given length. + + :param i: The integer to convert. + :param length: The length of the output byte-sequence. + :return: Sequence of bytes representing the integer in unsigned network byteorder. + """ + length = length or (i.bit_length() + 7) // 8 + return int.to_bytes(i, length, byteorder="big", signed=False) + + +def sha1(data) -> bytes: + """ + Takes an arbitrary block of byte-data and calculates a fixed-size bit string (a digest). + + :param data: Bytes to calculate hash for. + :return: Digest of data. + """ + return hashlib.sha1(data).digest() + + +def grouper(iterable, n, fillvalue=None): + """ + Collect data into fixed-length chunks or blocks. + Example: grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx. + From: https://docs.python.org/3/library/itertools.html#itertools-recipes. + + :param iterable: Iterable to split. + :param n: Length of chunks, + :param fillvalue: Value to use in case last chunk cannot be filled. + :return: Iterable split into chunks of size n, with last chunk padded with fillvalue if necessary. + """ + args = [iter(iterable)] * n + return zip_longest(*args, fillvalue=fillvalue) + + +def get_default_arg(func: callable, arg: str): + """ + Return function's default argument value for the given arg. + Based on https://stackoverflow.com/a/12627202 + """ + default = inspect.signature(func).parameters[arg].default + if default is inspect.Parameter.empty: + return None + return default + + +def log2(x: int): + """ + A log2 function that makes mathematicians go mad. + """ + try: + return math.log2(x) + except ValueError: + return 0 + + +def split_interval(max, num_intervals) -> List[Tuple[int, int]]: + """ + Split the interval [0, max] in num_interval sub-intervals using integer division. + + :param max: The original interval's max value. + :param num_intervals: The number of sub-intervals to produce. + :return: List of tuples containing the sub-intervals. + """ + return [(i*(max//num_intervals), (i+1)*(max//num_intervals)) for i in range(num_intervals)] + + +def humanize(n, precision=2, prefix="bin", suffix="B") -> str: + """ + Return a humanized string representation of a number (of bytes). + Adapted from Doug Latornell - http://code.activestate.com/recipes/577081/ + """ + abbrevs = { + "dec": [ + (1000 ** 5, 'P' + suffix), + (1000 ** 4, 'T' + suffix), + (1000 ** 3, 'G' + suffix), + (1000 ** 2, 'M' + suffix), + (1000 ** 1, 'k' + suffix), + (1000, suffix) + ], + "bin": [ + (1 << 50, 'Pi' + suffix), + (1 << 40, 'Ti' + suffix), + (1 << 30, 'Gi' + suffix), + (1 << 20, 'Mi' + suffix), + (1 << 10, 'ki' + suffix), + (1, suffix) + ] + } + + if n == 1: + return "1 " + suffix + + for factor, suffix in abbrevs[prefix]: + if n >= factor: + break + # noinspection PyUnboundLocalVariable + return '%.*f %s' % (precision, n / factor, suffix) + + +async def async_take(iterable: AsyncIterable, n: int) -> AsyncIterator: + """ + Forward the first n elements from an asynchronous iterable. + Designed to work like take() from https://docs.python.org/3/library/itertools.html. + + :param iterable: Async iterable. + :param n: The number of elements to forward. + :return: An async iterable containing the n first elements from given iterable. + """ + i = 0 + async for element in iterable: + if i >= n: + break + yield element + i += 1 + + +async def async_tail(n, iterable: AsyncIterable) -> Iterator: + """ + Return an async iterator over the last n items. + Example: async_tail(3, 'ABCDEFG') --> E F G. + Designed to work like tail() from https://docs.python.org/3/library/itertools.html#itertools-recipes. + + :param n: Number of items to return. + :param iterable: Async iterable to return from. + :return: Async iterable containing the last n items from the original iterable. + """ + deque = collections.deque(maxlen=n) + async for element in iterable: + deque.append(element) + return iter(deque) + + +async def async_last(iterable: AsyncIterable[T]) -> T: + """ + Return the last element of an async iterable. + + :param iterable: Async iterable + :return: The last element of the iterable. + """ + return next(await async_tail(1, iterable)) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/bittorrent/__init__.py b/tests/bittorrent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/bittorrent/test_peer.py b/tests/bittorrent/test_peer.py new file mode 100644 index 0000000..1bac11c --- /dev/null +++ b/tests/bittorrent/test_peer.py @@ -0,0 +1,48 @@ +import unittest +from datetime import timedelta, datetime +from unittest.mock import patch + +from silverstream.bittorrent.dht.peer import Peer, PeerStatus, Contact + + +class TestContact(unittest.TestCase): + def test_compact(self): + contact = Contact("1.2.3.4", 8080) + self.assertEqual(contact, Contact.from_compact(contact.compact)) + + +class TestPeer(unittest.TestCase): + def setUp(self): + self.peer = Peer("1.2.3.4", 8080, b'\x05%Rz\x00\x91\x91*\xd3\x10\x05C\xb8F\xa7\xe8\xab\xfe\xc7W') + + def test_compact(self): + self.assertEqual(self.peer.compact, Peer.from_compact(self.peer.compact).compact) + + def test_questionable_by_default(self): + self.assertEqual(self.peer.status, PeerStatus.QUESTIONABLE) + + def test_set_questionable(self): + self.peer.status = PeerStatus.BAD + self.peer.status = PeerStatus.QUESTIONABLE + self.assertEqual(self.peer.status, PeerStatus.QUESTIONABLE) + + def test_bad_overrides(self): + peer = Peer("1.2.3.4", 8080, b'\x05%Rz\x00\x91\x91*\xd3\x10\x05C\xb8F\xa7\xe8\xab\xfe\xc7W', + status=PeerStatus.BAD) + self.assertEqual(peer.status, PeerStatus.BAD) + + def test_good_for_15min(self): + with patch("silverstream.bittorrent.dht.peer.datetime") as mock_datetime: + mock_datetime.utcnow.return_value = datetime(2010, 1, 1, 12, 0, 0) + mock_datetime.side_effect = lambda *args, **kw: datetime(*args, **kw) + + self.peer.status = PeerStatus.GOOD + self.assertEqual(self.peer.status, PeerStatus.GOOD) + + mock_datetime.utcnow.return_value += timedelta(minutes=15) + + self.assertEqual(self.peer.status, PeerStatus.QUESTIONABLE) + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/add_torrents.py b/tools/add_torrents.py new file mode 100644 index 0000000..1eda772 --- /dev/null +++ b/tools/add_torrents.py @@ -0,0 +1,53 @@ +import os +import sys +from pathlib import Path +import libtorrent + +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) # magic import-fixer +from silverstream.bittorrent.database import TorrentDatabase, IndexStatus + +try: + torrents_dir, database_filepath = map(Path, sys.argv[1:]) +except (IndexError, ValueError): + exit(f"Usage: {__file__} ") + +torrent_database = TorrentDatabase(filepath=database_filepath) + +num_torrents = num_skipped = num_songs = 0 +for torrent in torrents_dir.iterdir(): + num_torrents += 1 + print() + print("---") + print("Adding", torrent) + torrent_info = libtorrent.torrent_info(str(torrent)) + info_hash = torrent_info.info_hash().to_bytes() + torrent_name = torrent_info.name() + print("Info hash:", info_hash.hex()) + print("Torrent name:", torrent_name) + + torrent_database.add_peer(info_hash, name=torrent_name) + + if torrent_database.get_index_status(info_hash) == IndexStatus.Indexed: + print("Torrent already indexed: skipping") + num_skipped += 1 + continue + + songs = [] + files = torrent_info.files() + for file_index in range(files.num_files()): + file_path = Path(files.file_path(file_index)) + if file_path.suffix in (".aac", ".flac", ".m4a", ".mp3", ".mpc", ".ogg", ".opus", ".wav", ".wma"): + songs.append((file_index, " / ".join(file_path.parts))) + num_songs += 1 + + if songs: + torrent_database.add_songs(info_hash, songs) + print("Songs:") + for file_index, song_name in songs: + print(song_name) + torrent_database.set_index_status(info_hash, IndexStatus.Indexed) + +print("-----") +print("Torrents:", num_torrents) +print("Skipped:", num_skipped) +print("Songs:", num_songs) diff --git a/tools/migrate_database.py b/tools/migrate_database.py new file mode 100644 index 0000000..ab719e9 --- /dev/null +++ b/tools/migrate_database.py @@ -0,0 +1,43 @@ +import dbm.gnu as dbm +import os +import sys +from pathlib import Path +from typing import List, Tuple, Iterator + +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) # magic import-fixer +from silverstream.bittorrent.database import TorrentDatabase +from silverstream.bittorrent.peer import TorrentPeer +from silverstream.util import grouper + + +class OldPeerDatabaseStub: + def __init__(self, filepath) -> None: + self.dbm = dbm.open(filepath, "rf") # read/fast mode + + def __iter__(self) -> Iterator[Tuple[bytes, List[TorrentPeer]]]: + info_hash = self.dbm.firstkey() + while info_hash is not None: + yield info_hash, self.get(info_hash) + info_hash = self.dbm.nextkey(info_hash) + + def get(self, info_hash: bytes) -> List[TorrentPeer]: + compact_peers = grouper(self.dbm.get(info_hash), 6) + return [TorrentPeer.from_compact(bytes(compact_peer)) for compact_peer in compact_peers] + + +try: + old_filepath = Path(sys.argv[1]) +except IndexError: + exit(f"Usage: {__file__} ") + +old = OldPeerDatabaseStub(str(old_filepath)) +new = TorrentDatabase(filepath=old_filepath.with_suffix(".sqlite")) + +num_keys = len(old.dbm.keys()) +num_hashes = num_peers = 0 +for info_hash, peers in old: + num_hashes += 1 + for peer in peers: + num_peers += 1 + new.add_peer(info_hash, peer) + print(f"{num_hashes} hashes, {num_peers} peers done ({num_hashes / num_keys:.2%})") diff --git a/tools/plot_stats.py b/tools/plot_stats.py new file mode 100644 index 0000000..88f892b --- /dev/null +++ b/tools/plot_stats.py @@ -0,0 +1,47 @@ +import json +import sys +from datetime import datetime +from pathlib import Path + +import matplotlib.pyplot as plt + +paths = [Path(f) for f in sys.argv[1:]] +if not paths: + exit(f"Usage: {__file__} ..") + + +def parse_stats(path: Path): + timestamps = [] + data = [] + with path.open() as file: + for line in file.readlines(): + stat = json.loads(line) + timestamps.append(datetime.utcfromtimestamp(stat["utc"])) + data.append(stat["data"]) + return max(d["crawler"]["num_nodes"] for d in data), timestamps, data + + +fig, (ax1, ax2) = plt.subplots(1, 2) + +ax1.set_ylabel("Torrent Hashes") + +ax2.set_ylabel("Peers") +ax2.yaxis.tick_right() +ax2.yaxis.set_label_position("right") + +for ax in (ax1, ax2): + ax.set_xlabel("Hours") + ax.tick_params("y") + ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x)))) + ax.grid(linewidth=.2) + + +for num_nodes, timestamps, data in sorted((parse_stats(p) for p in paths), reverse=True): + timestamps = [(t - timestamps[0]).total_seconds()/3600 for t in timestamps] # convert to hours relative to start + ax1.plot(timestamps, [d["torrent_database"]["num_hashes"] for d in data], label=f"{num_nodes} nodes") + ax2.plot(timestamps, [d["crawler"]["num_peers"] for d in data]) + + +ax1.legend(loc="upper left") +#fig.subplots_adjust(wspace=0) +plt.show()