diff --git a/.gitignore b/.gitignore
index b359132..7e75396 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@ out
tmp/
/test*py
data
+logs
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..c6f01c6
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,660 @@
+# GNU AFFERO GENERAL PUBLIC LICENSE
+
+Version 3, 19 November 2007
+
+Copyright (C) 2007 Free Software Foundation, Inc.
+
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+## Preamble
+
+The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains
+free software for all its users.
+
+When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate. Many developers of free software are heartened and
+encouraged by the resulting cooperation. However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community. It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server. Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals. This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing
+under this license.
+
+The precise terms and conditions for copying, distribution and
+modification follow.
+
+## TERMS AND CONDITIONS
+
+### 0. Definitions.
+
+"This License" refers to version 3 of the GNU Affero General Public
+License.
+
+"Copyright" also means copyright-like laws that apply to other kinds
+of works, such as semiconductor masks.
+
+"The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of
+an exact copy. The resulting work is called a "modified version" of
+the earlier work or a work "based on" the earlier work.
+
+A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user
+through a computer network, with no transfer of a copy, is not
+conveying.
+
+An interactive user interface displays "Appropriate Legal Notices" to
+the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+### 1. Source Code.
+
+The "source code" for a work means the preferred form of the work for
+making modifications to it. "Object code" means any non-source form of
+a work.
+
+A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can
+regenerate automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same
+work.
+
+### 2. Basic Permissions.
+
+All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey,
+without conditions so long as your license otherwise remains in force.
+You may convey covered works to others for the sole purpose of having
+them make modifications exclusively for you, or provide you with
+facilities for running those works, provided that you comply with the
+terms of this License in conveying all material for which you do not
+control copyright. Those thus making or running the covered works for
+you must do so exclusively on your behalf, under your direction and
+control, on terms that prohibit them from making any copies of your
+copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under the
+conditions stated below. Sublicensing is not allowed; section 10 makes
+it unnecessary.
+
+### 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such
+circumvention is effected by exercising rights under this License with
+respect to the covered work, and you disclaim any intention to limit
+operation or modification of the work as a means of enforcing, against
+the work's users, your or third parties' legal rights to forbid
+circumvention of technological measures.
+
+### 4. Conveying Verbatim Copies.
+
+You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+### 5. Conveying Modified Source Versions.
+
+You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these
+conditions:
+
+- a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+- b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under
+ section 7. This requirement modifies the requirement in section 4
+ to "keep intact all notices".
+- c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+- d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+### 6. Conveying Non-Source Forms.
+
+You may convey a covered work in object code form under the terms of
+sections 4 and 5, provided that you also convey the machine-readable
+Corresponding Source under the terms of this License, in one of these
+ways:
+
+- a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+- b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the Corresponding
+ Source from a network server at no charge.
+- c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+- d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+- e) Convey the object code using peer-to-peer transmission,
+ provided you inform other peers where the object code and
+ Corresponding Source of the work are being offered to the general
+ public at no charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal,
+family, or household purposes, or (2) anything designed or sold for
+incorporation into a dwelling. In determining whether a product is a
+consumer product, doubtful cases shall be resolved in favor of
+coverage. For a particular product received by a particular user,
+"normally used" refers to a typical or common use of that class of
+product, regardless of the status of the particular user or of the way
+in which the particular user actually uses, or expects or is expected
+to use, the product. A product is a consumer product regardless of
+whether the product has substantial commercial, industrial or
+non-consumer uses, unless such uses represent the only significant
+mode of use of the product.
+
+"Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to
+install and execute modified versions of a covered work in that User
+Product from a modified version of its Corresponding Source. The
+information must suffice to ensure that the continued functioning of
+the modified object code is in no case prevented or interfered with
+solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or
+updates for a work that has been modified or installed by the
+recipient, or for the User Product in which it has been modified or
+installed. Access to a network may be denied when the modification
+itself materially and adversely affects the operation of the network
+or violates the rules and protocols for communication across the
+network.
+
+Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+### 7. Additional Terms.
+
+"Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders
+of that material) supplement the terms of this License with terms:
+
+- a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+- b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+- c) Prohibiting misrepresentation of the origin of that material,
+ or requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+- d) Limiting the use for publicity purposes of names of licensors
+ or authors of the material; or
+- e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+- f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions
+ of it) with contractual assumptions of liability to the recipient,
+ for any liability that these contractual assumptions directly
+ impose on those licensors and authors.
+
+All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions; the
+above requirements apply either way.
+
+### 8. Termination.
+
+You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+However, if you cease all violation of this License, then your license
+from a particular copyright holder is reinstated (a) provisionally,
+unless and until the copyright holder explicitly and finally
+terminates your license, and (b) permanently, if the copyright holder
+fails to notify you of the violation by some reasonable means prior to
+60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+### 9. Acceptance Not Required for Having Copies.
+
+You are not required to accept this License in order to receive or run
+a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+### 10. Automatic Licensing of Downstream Recipients.
+
+Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+### 11. Patents.
+
+A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+A contributor's "essential patent claims" are all patent claims owned
+or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+A patent license is "discriminatory" if it does not include within the
+scope of its coverage, prohibits the exercise of, or is conditioned on
+the non-exercise of one or more of the rights that are specifically
+granted under this License. You may not convey a covered work if you
+are a party to an arrangement with a third party that is in the
+business of distributing software, under which you make payment to the
+third party based on the extent of your activity of conveying the
+work, and under which the third party grants, to any of the parties
+who would receive the covered work from you, a discriminatory patent
+license (a) in connection with copies of the covered work conveyed by
+you (or copies made from those copies), or (b) primarily for and in
+connection with specific products or compilations that contain the
+covered work, unless you entered into that arrangement, or that patent
+license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+### 12. No Surrender of Others' Freedom.
+
+If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under
+this License and any other pertinent obligations, then as a
+consequence you may not convey it at all. For example, if you agree to
+terms that obligate you to collect a royalty for further conveying
+from those to whom you convey the Program, the only way you could
+satisfy both those terms and this License would be to refrain entirely
+from conveying the Program.
+
+### 13. Remote Network Interaction; Use with the GNU General Public License.
+
+Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your
+version supports such interaction) an opportunity to receive the
+Corresponding Source of your version by providing access to the
+Corresponding Source from a network server at no charge, through some
+standard or customary means of facilitating copying of software. This
+Corresponding Source shall include the Corresponding Source for any
+work covered by version 3 of the GNU General Public License that is
+incorporated pursuant to the following paragraph.
+
+Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+### 14. Revised Versions of this License.
+
+The Free Software Foundation may publish revised and/or new versions
+of the GNU Affero General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever
+published by the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions
+of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+### 15. Disclaimer of Warranty.
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
+WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
+PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
+CORRECTION.
+
+### 16. Limitation of Liability.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR
+CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
+ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
+NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR
+LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM
+TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER
+PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+### 17. Interpretation of Sections 15 and 16.
+
+If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
+
+## How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively state
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as
+ published by the Free Software Foundation, either version 3 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper
+mail.
+
+If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source. For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code. There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for
+the specific requirements.
+
+You should also get your employer (if you work as a programmer) or
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. For more information on this, and how to apply and follow
+the GNU AGPL, see .
diff --git a/README.md b/README.md
index e69de29..3d0a0bc 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,27 @@
+# republisher-redux
+
+``` shell
+mkdir logs out
+poetry install
+poetry run repub
+```
+
+
+## License
+
+republisher-redux, a tool to mirror RSS/ATOM feeds completely offline
+
+Copyright (C) 2024 Abel Luck
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
diff --git a/poetry.lock b/poetry.lock
index 80da2e0..735c2e1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -539,13 +539,13 @@ files = [
[[package]]
name = "itemloaders"
-version = "1.1.0"
+version = "1.2.0"
description = "Base library for scrapy's ItemLoader"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "itemloaders-1.1.0-py3-none-any.whl", hash = "sha256:c8c82fe0c11fc4cdd08ec04df0b3c43f3cb7190002edb517e02d55de8efc2aeb"},
- {file = "itemloaders-1.1.0.tar.gz", hash = "sha256:21d81c61da6a08b48e5996288cdf3031c0f92e5d0075920a0242527523e14a48"},
+ {file = "itemloaders-1.2.0-py3-none-any.whl", hash = "sha256:6ec5753dafdc69262774694c78c9ec44605672586b40a7134a097a5df601a442"},
+ {file = "itemloaders-1.2.0.tar.gz", hash = "sha256:fc2307f984116b010d6101a68a6a133ac8de927320b0ab696f31ad710a8d8d98"},
]
[package.dependencies]
@@ -890,6 +890,92 @@ files = [
{file = "pbr-6.0.0.tar.gz", hash = "sha256:d1377122a5a00e2f940ee482999518efe16d745d423a670c27773dfbc3c9a7d9"},
]
+[[package]]
+name = "pillow"
+version = "10.3.0"
+description = "Python Imaging Library (Fork)"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"},
+ {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"},
+ {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"},
+ {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"},
+ {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"},
+ {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"},
+ {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"},
+ {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"},
+ {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"},
+ {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"},
+ {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"},
+ {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"},
+ {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"},
+ {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"},
+ {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"},
+ {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"},
+ {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"},
+ {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"},
+ {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"},
+ {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"},
+ {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"},
+ {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"},
+ {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"},
+ {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"},
+ {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"},
+ {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"},
+ {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"},
+ {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"},
+ {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"},
+ {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"},
+ {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"},
+ {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"},
+ {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"},
+ {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"},
+ {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"},
+ {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"},
+ {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"},
+ {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"},
+ {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"},
+ {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"},
+ {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"},
+ {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
+]
+
+[package.extras]
+docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"]
+fpx = ["olefile"]
+mic = ["olefile"]
+tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
+typing = ["typing-extensions"]
+xmp = ["defusedxml"]
+
[[package]]
name = "platformdirs"
version = "4.2.0"
@@ -1514,4 +1600,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
-content-hash = "c7cada0d348ebdcb48a3468d0b45aa8509b57ab3cd4d3c4065421bb0c0f1f57b"
+content-hash = "8b12b19145242fe86f09024453bca29792f6e22b4e63cfc72e2c6e480f38f043"
diff --git a/pyproject.toml b/pyproject.toml
index f819538..cc9cc4a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ python-dateutil = "^2.9.0.post0"
colorlog = "^6.8.2"
feedparser = "^6.0.11"
lxml = "^5.2.1"
+pillow = "^10.3.0"
[build-system]
diff --git a/repub/entrypoint.py b/repub/entrypoint.py
index 71205cb..7a6357d 100644
--- a/repub/entrypoint.py
+++ b/repub/entrypoint.py
@@ -1,30 +1,88 @@
-from scrapy.crawler import CrawlerProcess
-from scrapy.utils.project import get_project_settings
+import logging
+import multiprocessing as mp
+import multiprocessing.connection as mpc
-from . import colorlog
-from .postprocessing import SortRssItems
-from .spiders.rss_spider import RssFeedSpider
-
-base_settings = get_project_settings()
-
-settings = {
- **base_settings,
- "FEEDS": {
- "out/feed.rss": {
- "format": "rss",
- "postprocessing": [],
- },
- },
+feeds = {
+ "gp-pod": {"url": "https://guardianproject.info/podcast/podcast.xml"},
+ "nasa": {"url": "https://www.nasa.gov/rss/dyn/breaking_news.rss"},
}
-colorlog.load_colorlog()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+ch = logging.StreamHandler()
+ch.setLevel(logging.DEBUG)
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+ch.setFormatter(formatter)
+logger.addHandler(ch)
-urls = ["https://www.nasa.gov/rss/dyn/breaking_news.rss"]
+class FeedNameFilter:
+ def __init__(self, feed_options):
+ self.feed_options = feed_options
+
+ def accepts(self, item):
+ return item.feed_name == self.feed_options["feed_name"]
+
+
+def execute_spider(queue, name, url):
+ from scrapy.crawler import CrawlerProcess
+ from scrapy.settings import Settings
+ from scrapy.utils.project import get_project_settings
+
+ from .spiders.rss_spider import RssFeedSpider
+
+ try:
+ settings: Settings = {
+ **get_project_settings(),
+ "REPUBLISHER_OUT_DIR": "out",
+ "FEEDS": {
+ f"out/{name}.rss": {
+ "format": "rss",
+ "postprocessing": [],
+ # "item_filter": FeedNameFilter,
+ "feed_name": name,
+ }
+ },
+ "ITEM_PIPELINES": {
+ "repub.pipelines.ImagePipeline": 1,
+ "repub.pipelines.AudioPipeline": 2,
+ "repub.pipelines.VideoPipeline": 3,
+ "repub.pipelines.FilePipeline": 4,
+ },
+ "LOG_FILE": f"logs/{name}.log",
+ "REPUBLISHER_IMAGE_DIR": "images",
+ "REPUBLISHER_VIDEO_DIR": "video",
+ "REPUBLISHER_AUDIO_DIR": "audio",
+ "REPUBLISHER_FILE_DIR": "files",
+ "IMAGES_STORE": f"out/{name}/images",
+ "AUDIO_STORE": f"out/{name}/audio",
+ "VIDEO_STORE": f"out/{name}/images",
+ "FILES_STORE": f"out/{name}/files",
+ }
+ process = CrawlerProcess(settings)
+ # colorlog.load_colorlog()
+ process.crawl(RssFeedSpider, feed_name=name, urls=[url])
+ process.start()
+ queue.put(None)
+ except Exception as e:
+ queue.put(e)
def entrypoint():
- process = CrawlerProcess(settings)
-
- process.crawl(RssFeedSpider, urls=urls)
- process.start() # the script will block here until the crawling is finished
+ pool = []
+ for name, data in feeds.items():
+ logger.info(f"Starting feed {name}")
+ queue = mp.Queue()
+ process = mp.Process(target=execute_spider, args=(queue, name, data["url"]))
+ pool.append((name, process, queue))
+ for n, proc, q in pool:
+ proc.start()
+ mpc.wait(p.sentinel for n, p, q in pool)
+ for name, p, q in pool:
+ result = q.get()
+ if result is not None:
+ print()
+ logger.error(f"Feed {name} encountered error")
+ logger.critical(result, exc_info=True)
+ else:
+ logger.info(f"Feed {name} completed successfully")
diff --git a/repub/exporters.py b/repub/exporters.py
index ee28b53..21d6fcb 100644
--- a/repub/exporters.py
+++ b/repub/exporters.py
@@ -28,7 +28,7 @@ class RssExporter(BaseItemExporter):
self.flush_buffer()
return
- if not self.channel:
+ if self.channel is None:
self.item_buffer.append(item)
else:
self.export_rss_item(item)
diff --git a/repub/items.py b/repub/items.py
index 4cb36f2..748858e 100644
--- a/repub/items.py
+++ b/repub/items.py
@@ -1,12 +1,24 @@
from dataclasses import dataclass
-from typing import Any
+from typing import Any, List
@dataclass
class ElementItem:
+ feed_name: str
el: Any
+ image_urls: List[str]
+ images: List[Any]
+ file_urls: List[str]
+ files: List[Any]
+ audio_urls: List[str]
+ audios: List[Any]
+ video_urls: List[str]
+ videos: List[Any]
@dataclass
class ChannelElementItem:
+ feed_name: str
el: Any
+ image_urls: List[str]
+ images: List[Any]
diff --git a/repub/pipelines.py b/repub/pipelines.py
index 9b9d0f2..ccfa57e 100644
--- a/repub/pipelines.py
+++ b/repub/pipelines.py
@@ -1,83 +1,44 @@
-# Define your item pipelines here
-#
-# Don't forget to add your pipeline to the ITEM_PIPELINES setting
-# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+from os import PathLike
+from pathlib import PurePosixPath
+from typing import IO, DefaultDict, Optional, Set, Union
+from urllib.parse import urlparse
+
+import repub.utils
+from repub.exporters import RssExporter
+from scrapy.pipelines.images import FilesPipeline as BaseFilesPipeline
+from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
-# useful for handling different item types with a single interface
-# from itemadapter import ItemAdapter
-import six
-from scrapy import signals
-from scrapy.exceptions import CloseSpider, NotConfigured
-from scrapy.utils.misc import load_object
+class ImagePipeline(BaseImagesPipeline):
+ def file_path(self, request, response=None, info=None, *, item=None):
+ return repub.utils.local_image_path(request.url)
-from .exporters import RssItemExporter
-from .items import RssItem
-from .signals import feed_channel_discovered
+ def thumb_path(self, request, thumb_id, response=None, info=None, *, item=None):
+ raise NotImplementedError()
-class RssExportPipeline(object):
- def __init__(self):
- self.files = {}
- self.exporters = {}
-
- @classmethod
- def from_crawler(cls, crawler):
- pipeline = cls()
- crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
- crawler.signals.connect(
- pipeline.feed_channel_discovered, feed_channel_discovered
- )
- return pipeline
-
- def feed_channel_discovered(self, spider, feed, channel):
- try:
- file = open(spider.settings.get("FEED_FILE"), "wb")
- except TypeError:
- raise NotConfigured("FEED_FILE parameter does not string or does not exist")
- except (IOError, OSError) as e:
- raise CloseSpider(
- "Cannot open file {}: {}".format(
- spider.settings.get("FEED_FILE", None), e
- )
- )
- self.files[spider] = file
-
- item_cls = spider.settings.get(
- "FEED_ITEM_CLASS", spider.settings.get("FEED_ITEM_CLS", RssItem)
- )
- if isinstance(item_cls, six.string_types):
- item_cls = load_object(item_cls)
-
- namespaces = spider.settings.get("FEED_NAMESPACES", {})
-
- feed_exporter = spider.settings.get("FEED_EXPORTER", RssItemExporter)
- if isinstance(feed_exporter, six.string_types):
- feed_exporter = load_object(feed_exporter)
- if not issubclass(feed_exporter, RssItemExporter):
- raise TypeError(
- "FEED_EXPORTER must be RssItemExporter or its subclass, not '{}'".format(
- feed_exporter
- )
- )
- self.exporters[spider] = feed_exporter(
- file,
- channel,
- namespaces=namespaces,
- item_cls=item_cls,
- )
- self.exporters[spider].start_exporting()
-
- def spider_closed(self, spider):
- self.exporters[spider].finish_exporting()
- file = self.files.pop(spider)
- file.close()
-
- def process_item(self, item, spider):
- self.exporters[spider].export_item(item)
- return item
+class FilePipeline(BaseFilesPipeline):
+ def file_path(self, request, response=None, info=None, *, item=None):
+ return repub.utils.local_file_path(request.url)
-class RepubPipeline:
- def process_item(self, item, spider):
- return item
+class AudioPipeline(BaseFilesPipeline):
+ def __init__(self, store_uri: Union[str, PathLike], **kwargs):
+ self.FILES_URLS_FIELD = "audio_urls"
+ self.FILES_RESULT_FIELD = "audios"
+ store_uri = kwargs["settings"]["AUDIO_STORE"]
+ super().__init__(store_uri, **kwargs)
+
+ def file_path(self, request, response=None, info=None, *, item=None):
+ return repub.utils.local_audio_path(request.url)
+
+
+class VideoPipeline(BaseFilesPipeline):
+ def __init__(self, store_uri: Union[str, PathLike], **kwargs):
+ self.FILES_URLS_FIELD = "video_urls"
+ self.FILES_RESULT_FIELD = "videos"
+ store_uri = kwargs["settings"]["VIDEO_STORE"]
+ super().__init__(store_uri, **kwargs)
+
+ def file_path(self, request, response=None, info=None, *, item=None):
+ return repub.utils.local_video_path(request.url)
diff --git a/repub/postprocessing.py b/repub/postprocessing.py
index 8b8b3c1..e69de29 100644
--- a/repub/postprocessing.py
+++ b/repub/postprocessing.py
@@ -1,11 +0,0 @@
-class SortRssItems:
- def __init__(self, file, feed_options):
- self.file = file
- self.feed_options = feed_options
- self.buffer = ""
-
- def write(self, data):
- self.buffer += data.decode("utf-8")
-
- def close(self):
- self.file.write(sorted)
diff --git a/repub/rss.py b/repub/rss.py
index 0e0af5c..0a6ab18 100644
--- a/repub/rss.py
+++ b/repub/rss.py
@@ -78,7 +78,7 @@ def sort_rss(root):
def serialize(root):
- root = sort_rss(root)
+ # root = sort_rss(root)
return etree.tostring(
root, encoding="utf-8", xml_declaration=True, pretty_print=True
)
diff --git a/repub/settings.py b/repub/settings.py
index b6e3f5e..076ed7e 100644
--- a/repub/settings.py
+++ b/repub/settings.py
@@ -93,4 +93,9 @@ FEED_EXPORTERS = {
"rss": "repub.exporters.RssExporter",
}
-LOG_LEVEL = "ERROR"
+TELNETCONSOLE_ENABLED = False
+
+LOG_LEVEL = "INFO"
+# LOG_LEVEL = "ERROR"
+
+MEDIA_ALLOW_REDIRECTS = True
diff --git a/repub/spiders/rss_spider.py b/repub/spiders/rss_spider.py
index 00ffd83..ab4b7b9 100644
--- a/repub/spiders/rss_spider.py
+++ b/repub/spiders/rss_spider.py
@@ -3,6 +3,8 @@ import logging
import feedparser
from repub.items import ChannelElementItem, ElementItem
from repub.rss import CDATA, CONTENT, ITUNES, MEDIA, E, normalize_date
+from repub.utils import FileType, determine_file_type, local_file_path, local_image_path
+from scrapy.crawler import Crawler
from scrapy.spiders import Spider
from scrapy.utils.spider import iterate_spider_output
@@ -13,6 +15,34 @@ class BaseRssFeedSpider(Spider):
from RSS feeds.
"""
+ def __init__(self, feed_name, **kwargs):
+ super().__init__(**kwargs)
+ self.feed_name = feed_name
+
+ def _set_crawler(self, crawler: Crawler) -> None:
+ super()._set_crawler(crawler)
+ for s in [
+ "REPUBLISHER_IMAGE_DIR",
+ "REPUBLISHER_FILE_DIR",
+ "REPUBLISHER_AUDIO_DIR",
+ "REPUBLISHER_VIDEO_DIR",
+ ]:
+ if self.settings.get(s) is None:
+ raise RuntimeError(f"Missing setting: {s}")
+
+ def rewrite_file_url(self, file_type: FileType, url):
+ file_dir = self.settings["REPUBLISHER_FILE_DIR"]
+ if file_type == FileType.IMAGE:
+ file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
+ elif file_type == FileType.VIDEO:
+ file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
+ elif file_type == FileType.AUDIO:
+ file_dir = self.settings["REPUBLISHER_AUDIO_DIR"]
+ return f"/{file_dir}/{local_file_path(url)}"
+
+ def rewrite_image_url(self, url):
+ return self.rewrite_file_url(FileType.IMAGE, url)
+
def parse_feed(self, feed_text):
parsed = feedparser.parse(feed_text, sanitize_html=False)
if parsed.bozo:
@@ -48,25 +78,30 @@ class BaseRssFeedSpider(Spider):
for tag in f.get("tags", []):
channel.append(E.category(tag.term))
+ image_urls = []
if "image" in f:
if "href" in f.image:
image = E.image(
E.title(f.get("title")),
E.link(f.get("link")),
- E.url(f.image.get("href")),
+ E.url(self.rewrite_image_url(f.image.get("href"))),
E.description(f.get("description")),
)
+ image_urls.append(f.image.get("href"))
else:
image = E.image(
E.title(f.image.get("title")),
E.link(f.image.get("link")),
- E.url(f.image.get("url")),
+ E.url(self.rewrite_image_url(f.image.get("url"))),
E.description(f.image.get("description")),
E.width(f.image.get("width")),
E.height(f.image.get("height")),
)
+ image_urls.append(f.image.get("url"))
channel.append(image)
- return ChannelElementItem(el=channel)
+ return ChannelElementItem(
+ feed_name=self.feed_name, el=channel, image_urls=image_urls, images=[]
+ )
def _parse(self, response, **kwargs):
response = self.adapt_response(response)
@@ -113,6 +148,21 @@ class RssFeedSpider(BaseRssFeedSpider):
super().__init__(**kwargs)
def parse_entry(self, response, feed, entry):
+ image_urls = []
+ file_urls = []
+ audio_urls = []
+ video_urls = []
+
+ def add_url(file_type, url):
+ if file_type == FileType.IMAGE:
+ image_urls.append(url)
+ elif file_type == FileType.AUDIO:
+ audio_urls.append(url)
+ elif file_type == FileType.VIDEO:
+ video_urls.append(url)
+ elif file_type == FileType.FILE:
+ file_urls.append(url)
+
item = E.item(
E.title(entry.get("title")),
E.link(entry.get("link")),
@@ -125,15 +175,29 @@ class RssFeedSpider(BaseRssFeedSpider):
E.author(entry.get("author")),
ITUNES.summary(entry.get("summary")),
ITUNES.duration(entry.get("itunes_duration")),
+ ITUNES.image(
+ None,
+ (
+ {"href": self.rewrite_image_url(entry.get("image").href)}
+ if "image" in entry
+ else None
+ ),
+ ),
)
+ if entry.get("image"):
+ image_urls.append(entry.get("image").href)
for enc in entry.enclosures:
+ file_type = determine_file_type(
+ url=enc.get("href"), mimetype=enc.get("type")
+ )
item.append(
E.enclosure(
- E.url(enc.get("href")),
+ E.url(self.rewrite_file_url(file_type, enc.get("href"))),
E.length(enc.get("length")),
E.type(enc.get("type")),
)
)
+ add_url(file_type, enc.get("href"))
if "content" in entry:
for c in entry.content:
@@ -144,9 +208,14 @@ class RssFeedSpider(BaseRssFeedSpider):
for media in (
media for media in entry["media_content"] if media.get("url")
):
+ file_type = determine_file_type(
+ url=media.get("url"),
+ medium=media.get("medium"),
+ mimetype=media.get("type"),
+ )
item.append(
MEDIA.content(
- E.url(media.get("url")),
+ E.url(self.rewrite_file_url(file_type, media.get("url"))),
E.type(media.get("type")),
E.medium(media.get("medium")),
E.isDefault(media.get("isDefault")),
@@ -161,4 +230,16 @@ class RssFeedSpider(BaseRssFeedSpider):
E.lang(media.get("lang")),
)
)
- return ElementItem(el=item)
+ add_url(file_type, media.get("url"))
+ return ElementItem(
+ feed_name=self.feed_name,
+ el=item,
+ images=[],
+ image_urls=image_urls,
+ files=[],
+ file_urls=file_urls,
+ audio_urls=audio_urls,
+ audios=[],
+ video_urls=video_urls,
+ videos=[],
+ )
diff --git a/repub/utils.py b/repub/utils.py
new file mode 100644
index 0000000..6c827c7
--- /dev/null
+++ b/repub/utils.py
@@ -0,0 +1,74 @@
+import hashlib
+import mimetypes
+from enum import Enum
+from pathlib import Path
+from typing import Any, List, Optional
+
+from scrapy.utils.python import to_bytes
+
+
+class FileType(Enum):
+ """File types that the republisher can handle"""
+
+ VIDEO = "video"
+ IMAGE = "image"
+ AUDIO = "audio"
+ FILE = "file"
+
+
+def local_image_path(name: str) -> str:
+ image_guid = hashlib.sha1(to_bytes(name)).hexdigest() # nosec
+ return f"full/{image_guid}.jpg"
+
+
+def local_file_path(s: str) -> str:
+ media_guid = hashlib.sha1(to_bytes(s)).hexdigest() # nosec
+ media_ext = Path(s).suffix
+ # Handles empty and wild extensions by trying to guess the
+ # mime type then extension or default to empty string otherwise
+ if media_ext not in mimetypes.types_map:
+ media_ext = ""
+ media_type = mimetypes.guess_type(s)[0]
+ if media_type:
+ media_ext = mimetypes.guess_extension(media_type)
+ return f"{media_guid}{media_ext}"
+
+
+def local_video_path(s: str) -> str:
+ return local_file_path(s)
+
+
+def local_audio_path(s: str) -> str:
+ return local_file_path(s)
+
+
+def determine_file_type(
+ url: str, medium: Optional[str] = None, mimetype: Optional[str] = None
+):
+ """
+ Uses all available information to determine the type of a file from a path/url
+ """
+ if medium:
+ if medium == "video":
+ return FileType.VIDEO
+ if medium == "audio":
+ return FileType.AUDIO
+ if medium == "image":
+ return FileType.IMAGE
+ if medium == "document":
+ return FileType.FILE
+ if medium == "executable":
+ return FileType.FILE
+
+ if not mimetype:
+ mimetype = mimetypes.guess_type(url)[0]
+
+ if mimetype:
+ if mimetype.startswith("image"):
+ return FileType.IMAGE
+ if mimetype.startswith("audio"):
+ return FileType.AUDIO
+ if mimetype.startswith("video"):
+ return FileType.VIDEO
+
+ return FileType.FILE