commit a965b85bcbc81b6dd59c389be30d823fff9348cf
parent 752f58a94941d1a4cba760456f010e22accb921d
Author: Yuval Langer <yuval.langer@gmail.com>
Date: Wed, 27 Jun 2018 14:21:05 +0300
Simplify the code and add a user agent header to the pandoc call
Diffstat:
1 file changed, 19 insertions(+), 48 deletions(-)
diff --git a/bin/url2pandoc2calibre b/bin/url2pandoc2calibre
@@ -1,57 +1,28 @@
-#!/usr/bin/env python3
-
-"""url2pandoc2calibre.
-
-Usage:
- url2pandoc2calibre [URL]
-"""
-
+#!/usr/bin/python3
import pathlib
-import string
-from subprocess import Popen
+import subprocess
import sys
-
-from bs4 import BeautifulSoup
-
-import docopt
-
-import requests
-
-
-LEGAL_CHARACTERS = string.ascii_letters + string.digits
-
-
-def clean_title(title):
- for c in title:
- if c not in LEGAL_CHARACTERS:
- yield "-"
- continue
-
- yield c.lower()
+import uuid
def main():
- arguments = docopt.docopt(__doc__)
- try:
- url = arguments["URL"]
- except KeyError:
- print(docopt.printable_usage(__doc__))
- print(arguments)
- sys.exit(-1)
-
- response = requests.get(url)
-
- html = BeautifulSoup(response.text, "lxml")
-
- title = "".join(clean_title(html.find("title").text))
-
- output_filename = f"{title}.epub"
- output_directory = pathlib.Path("~").expanduser() / "calibre-inbox"
- output_path = output_directory / output_filename
-
- process = Popen(["pandoc", url, "-o", str(output_path)])
- process.wait()
+ calibre_inbox_dirpath = pathlib.Path.home() / "calibre-inbox"
+ a_uuid = uuid.uuid4()
+ output_path = calibre_inbox_dirpath / f"{a_uuid}.epub"
+ user_agent = "User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0"
+
+ command_list = [
+ "pandoc",
+ f"--output={output_path}",
+ f"--request-header='{user_agent}'",
+ "--self-contained",
+ "--verbose",
+ ] + sys.argv[1:]
+
+ print(" ".join(command_list))
+
+ subprocess.run(command_list, check=True)
if __name__ == "__main__":