dotfiles

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit a965b85bcbc81b6dd59c389be30d823fff9348cf
parent 752f58a94941d1a4cba760456f010e22accb921d
Author: Yuval Langer <yuval.langer@gmail.com>
Date:   Wed, 27 Jun 2018 14:21:05 +0300

Simplify the code and add a user agent header to the pandoc call

Diffstat:
Mbin/url2pandoc2calibre | 67+++++++++++++++++++------------------------------------------------
1 file changed, 19 insertions(+), 48 deletions(-)

diff --git a/bin/url2pandoc2calibre b/bin/url2pandoc2calibre @@ -1,57 +1,28 @@ -#!/usr/bin/env python3 - -"""url2pandoc2calibre. - -Usage: - url2pandoc2calibre [URL] -""" - +#!/usr/bin/python3 import pathlib -import string -from subprocess import Popen +import subprocess import sys - -from bs4 import BeautifulSoup - -import docopt - -import requests - - -LEGAL_CHARACTERS = string.ascii_letters + string.digits - - -def clean_title(title): - for c in title: - if c not in LEGAL_CHARACTERS: - yield "-" - continue - - yield c.lower() +import uuid def main(): - arguments = docopt.docopt(__doc__) - try: - url = arguments["URL"] - except KeyError: - print(docopt.printable_usage(__doc__)) - print(arguments) - sys.exit(-1) - - response = requests.get(url) - - html = BeautifulSoup(response.text, "lxml") - - title = "".join(clean_title(html.find("title").text)) - - output_filename = f"{title}.epub" - output_directory = pathlib.Path("~").expanduser() / "calibre-inbox" - output_path = output_directory / output_filename - - process = Popen(["pandoc", url, "-o", str(output_path)]) - process.wait() + calibre_inbox_dirpath = pathlib.Path.home() / "calibre-inbox" + a_uuid = uuid.uuid4() + output_path = calibre_inbox_dirpath / f"{a_uuid}.epub" + user_agent = "User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0" + + command_list = [ + "pandoc", + f"--output={output_path}", + f"--request-header='{user_agent}'", + "--self-contained", + "--verbose", + ] + sys.argv[1:] + + print(" ".join(command_list)) + + subprocess.run(command_list, check=True) if __name__ == "__main__":