commit 61a247fa5692b1e2bcb72cf81eb31bca2a07ddc0 Author: Lukas Winkler Date: Wed Dec 19 22:05:58 2018 +0100 first working version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..21d1217 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +config.py +library/ +storage.* +.idea/ +*.egg-info +__pycache__/ \ No newline at end of file diff --git a/example.py b/example.py new file mode 100644 index 0000000..0811009 --- /dev/null +++ b/example.py @@ -0,0 +1,151 @@ +import os +import sys +import posixpath + +import click + + +class Repo(object): + + def __init__(self, home): + self.home = home + self.config = {} + self.verbose = False + + def set_config(self, key, value): + self.config[key] = value + if self.verbose: + click.echo(' config[%s] = %s' % (key, value), file=sys.stderr) + + def __repr__(self): + return '' % self.home + + +pass_repo = click.make_pass_decorator(Repo) + + +@click.group() +@click.option('--repo-home', envvar='REPO_HOME', default='.repo', + metavar='PATH', help='Changes the repository folder location.') +@click.option('--config', nargs=2, multiple=True, + metavar='KEY VALUE', help='Overrides a config key/value pair.') +@click.option('--verbose', '-v', is_flag=True, + help='Enables verbose mode.') +@click.version_option('1.0') +@click.pass_context +def cli(ctx, repo_home, config, verbose): + """Repo is a command line tool that showcases how to build complex + command line interfaces with Click. + + This tool is supposed to look like a distributed version control + system to show how something like this can be structured. + """ + # Create a repo object and remember it as as the context object. From + # this point onwards other commands can refer to it by using the + # @pass_repo decorator. + ctx.obj = Repo(os.path.abspath(repo_home)) + ctx.obj.verbose = verbose + for key, value in config: + ctx.obj.set_config(key, value) + + +@cli.command() +@click.argument('src') +@click.argument('dest', required=False) +@click.option('--shallow/--deep', default=False, + help='Makes a checkout shallow or deep. Deep by default.') +@click.option('--rev', '-r', default='HEAD', + help='Clone a specific revision instead of HEAD.') +@pass_repo +def clone(repo, src, dest, shallow, rev): + """Clones a repository. + + This will clone the repository at SRC into the folder DEST. If DEST + is not provided this will automatically use the last path component + of SRC and create that folder. + """ + if dest is None: + dest = posixpath.split(src)[-1] or '.' + click.echo('Cloning repo %s to %s' % (src, os.path.abspath(dest))) + repo.home = dest + if shallow: + click.echo('Making shallow checkout') + click.echo('Checking out revision %s' % rev) + + +@cli.command() +@click.confirmation_option() +@pass_repo +def delete(repo): + """Deletes a repository. + + This will throw away the current repository. + """ + click.echo('Destroying repo %s' % repo.home) + click.echo('Deleted!') + + +@cli.command() +@click.option('--username', prompt=True, + help='The developer\'s shown username.') +@click.option('--email', prompt='E-Mail', + help='The developer\'s email address') +@click.password_option(help='The login password.') +@pass_repo +def setuser(repo, username, email, password): + """Sets the user credentials. + + This will override the current user config. + """ + repo.set_config('username', username) + repo.set_config('email', email) + repo.set_config('password', '*' * len(password)) + click.echo('Changed credentials.') + + +@cli.command() +@click.option('--message', '-m', multiple=True, + help='The commit message. If provided multiple times each ' + 'argument gets converted into a new line.') +@click.argument('files', nargs=-1, type=click.Path()) +@pass_repo +def commit(repo, files, message): + """Commits outstanding changes. + + Commit changes to the given files into the repository. You will need to + "repo push" to push up your changes to other repositories. + + If a list of files is omitted, all changes reported by "repo status" + will be committed. + """ + if not message: + marker = '# Files to be committed:' + hint = ['', '', marker, '#'] + for file in files: + hint.append('# U %s' % file) + message = click.edit('\n'.join(hint)) + if message is None: + click.echo('Aborted!') + return + msg = message.split(marker)[0].rstrip() + if not msg: + click.echo('Aborted! Empty commit message') + return + else: + msg = '\n'.join(message) + click.echo('Files to be committed: %s' % (files,)) + click.echo('Commit message:\n' + msg) + + +@cli.command(short_help='Copies files.') +@click.option('--force', is_flag=True, + help='forcibly copy over an existing managed file') +@click.argument('src', nargs=-1, type=click.Path()) +@click.argument('dst', type=click.Path()) +@pass_repo +def copy(repo, src, dst, force): + """Copies one or multiple files to a new location. This copies all + files from SRC to DST. + """ + for fn in src: + click.echo('Copy from %s -> %s' % (fn, dst)) diff --git a/main.py b/main.py new file mode 100644 index 0000000..d7c6dd6 --- /dev/null +++ b/main.py @@ -0,0 +1,120 @@ +import math + +import ads +import ads.config +import click +import peewee +import requests +from peewee import Model + +import config +from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db + +ads.config.token = config.ads_token + + +@click.group() +@click.version_option('1.0') +@click.pass_context +def cli(ctx): + pass + # print("bla") + + +cli = cli # type:click.core.Group + + +@cli.command() +def init(): + print("initializing") + db.create_tables([Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords]) + + +# @cli.command() +# @click.argument('file', type=click.Path(exists=True, readable=True)) +# @click.option('-p', '--python_file', is_flag=True) +# def add(file, python_file): +# fo = Files(filename=file, pythonfile=python_file) +# fo.save() +# print(file, python_file) +# pass + +@cli.command() +@click.argument("search_query") +@click.option("-a", "--author") +@click.option("-t", "--title") +def add(search_query, author, title): + fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub", + "doctype", "identifier"] + if author: + search_query += "author:" + author + if title: + search_query += "title:" + title + papers = list(ads.SearchQuery(q=search_query, fl=fl)) + if len(papers) == 0: + selection = ads.search.Article + exit() + elif len(papers) == 1: + selection = papers[0] # type:ads.search.Article + else: + # first_ten = itertools.islice(papers, 10) + first_ten = papers[:10] + single_paper: ads.search.Article + for index, single_paper in enumerate(first_ten): + print(index, single_paper.title[0]) + selected_index = click.prompt('select paper', type=int) + selection = papers[selected_index] # type:ads.search.Article + + assert len(selection.doi) == 1 + doi = selection.doi[0] + + try: + + paper = Paper.get(Paper.doi == doi) + print("this paper has already been added") + exit(1) + + except peewee.DoesNotExist: + pass + + print("fetching bibcode") + q = ads.ExportQuery([selection.bibcode]) + bibtex = q.execute() + + print("saving in db") + + paper = Paper() + assert len(selection.title) == 1 + paper.doi = doi + paper.title = selection.title[0] + paper.abstract = selection.abstract + paper.bibcode = selection.bibcode + paper.year = selection.year + paper.pubdate = selection.pubdate + paper.pdf_downloaded = False + authors = [Author.get_or_create(name=name)[0] for name in selection.author] + paper.first_author = Author.get_or_create(name=selection.first_author)[0] + paper.publication = Publication.get_or_create(name=selection.pub)[0] + paper.doctype = Doctype.get_or_create(name=selection.doctype)[0] + paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1] + paper.bibtex = bibtex + paper.save() + for author in db.batch_commit(authors, 100): + PaperAuthors.create(author=author, paper=paper) + print("fetching PDF") + arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier) + r = requests.get(arxiv_url, stream=True) + print(arxiv_url) + with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f: + chunk_size = 1024 # bytes + file_size = int(r.headers.get('content-length', 0)) + progress_length = math.ceil(file_size // chunk_size) + with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks: + for chunk in progress_chunks: + f.write(chunk) + paper.pdf_downloaded = True + paper.save() + + +if __name__ == '__main__': + cli() diff --git a/models.py b/models.py new file mode 100644 index 0000000..ea9cad7 --- /dev/null +++ b/models.py @@ -0,0 +1,58 @@ +from peewee import Model, CharField, SqliteDatabase, BooleanField, TextField, \ + ForeignKeyField, DateField + +db = SqliteDatabase('storage.db', pragmas={ + 'journal_mode': 'wal', + 'cache_size': -1 * 64000, # 64MB + 'foreign_keys': 1, + 'ignore_check_constraints': 0}) + +db.connect() + + +class BaseModel(Model): + class Meta: + database = db + + +class Author(BaseModel): + name = CharField(unique=True) + affiliation = CharField(null=True) + orcid_id = CharField(null=True) + + +class Keyword(BaseModel): + keyword = CharField(unique=True) + + +class Publication(BaseModel): + name = CharField(unique=True) + + +class Doctype(BaseModel): + name = CharField(unique=True) + + +class Paper(BaseModel): + title = TextField() + abstract = TextField() + doi = CharField(unique=True) + bibtex = TextField() + first_author = ForeignKeyField(Author) + publication = ForeignKeyField(Publication) + doctype = ForeignKeyField(Doctype) + arxiv_identifier = CharField(unique=True) + bibcode = CharField(unique=True) + year = CharField() + pubdate = DateField() + pdf_downloaded = BooleanField() + + +class PaperAuthors(BaseModel): + paper = ForeignKeyField(Paper) + author = ForeignKeyField(Author) + + +class PaperKeywords(BaseModel): + paper = ForeignKeyField(Paper) + keyword = ForeignKeyField(Keyword) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..282a633 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +ads==0.12.3 +certifi==2018.11.29 +chardet==3.0.4 +Click==7.0 +click-example-repo==0.1 +httpretty==0.8.10 +idna==2.8 +mock==2.0.0 +pbr==5.1.1 +peewee==3.8.0 +requests==2.21.0 +six==1.12.0 +urllib3==1.24.1 +Werkzeug==0.14.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..48f9fca --- /dev/null +++ b/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup + +setup( + name='click-example-repo', + version='0.1', + py_modules=['repo'], + include_package_data=True, + install_requires=[ + 'click', + ], + entry_points=''' + [console_scripts] + repo=example:cli + paperlibrary=main:cli + ''', +)