1
0
Fork 0

first working version

This commit is contained in:
Lukas Winkler 2018-12-19 22:05:58 +01:00
commit 61a247fa56
6 changed files with 365 additions and 0 deletions

6
.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
config.py
library/
storage.*
.idea/
*.egg-info
__pycache__/

151
example.py Normal file
View file

@ -0,0 +1,151 @@
import os
import sys
import posixpath
import click
class Repo(object):
def __init__(self, home):
self.home = home
self.config = {}
self.verbose = False
def set_config(self, key, value):
self.config[key] = value
if self.verbose:
click.echo(' config[%s] = %s' % (key, value), file=sys.stderr)
def __repr__(self):
return '<Repo %r>' % self.home
pass_repo = click.make_pass_decorator(Repo)
@click.group()
@click.option('--repo-home', envvar='REPO_HOME', default='.repo',
metavar='PATH', help='Changes the repository folder location.')
@click.option('--config', nargs=2, multiple=True,
metavar='KEY VALUE', help='Overrides a config key/value pair.')
@click.option('--verbose', '-v', is_flag=True,
help='Enables verbose mode.')
@click.version_option('1.0')
@click.pass_context
def cli(ctx, repo_home, config, verbose):
"""Repo is a command line tool that showcases how to build complex
command line interfaces with Click.
This tool is supposed to look like a distributed version control
system to show how something like this can be structured.
"""
# Create a repo object and remember it as as the context object. From
# this point onwards other commands can refer to it by using the
# @pass_repo decorator.
ctx.obj = Repo(os.path.abspath(repo_home))
ctx.obj.verbose = verbose
for key, value in config:
ctx.obj.set_config(key, value)
@cli.command()
@click.argument('src')
@click.argument('dest', required=False)
@click.option('--shallow/--deep', default=False,
help='Makes a checkout shallow or deep. Deep by default.')
@click.option('--rev', '-r', default='HEAD',
help='Clone a specific revision instead of HEAD.')
@pass_repo
def clone(repo, src, dest, shallow, rev):
"""Clones a repository.
This will clone the repository at SRC into the folder DEST. If DEST
is not provided this will automatically use the last path component
of SRC and create that folder.
"""
if dest is None:
dest = posixpath.split(src)[-1] or '.'
click.echo('Cloning repo %s to %s' % (src, os.path.abspath(dest)))
repo.home = dest
if shallow:
click.echo('Making shallow checkout')
click.echo('Checking out revision %s' % rev)
@cli.command()
@click.confirmation_option()
@pass_repo
def delete(repo):
"""Deletes a repository.
This will throw away the current repository.
"""
click.echo('Destroying repo %s' % repo.home)
click.echo('Deleted!')
@cli.command()
@click.option('--username', prompt=True,
help='The developer\'s shown username.')
@click.option('--email', prompt='E-Mail',
help='The developer\'s email address')
@click.password_option(help='The login password.')
@pass_repo
def setuser(repo, username, email, password):
"""Sets the user credentials.
This will override the current user config.
"""
repo.set_config('username', username)
repo.set_config('email', email)
repo.set_config('password', '*' * len(password))
click.echo('Changed credentials.')
@cli.command()
@click.option('--message', '-m', multiple=True,
help='The commit message. If provided multiple times each '
'argument gets converted into a new line.')
@click.argument('files', nargs=-1, type=click.Path())
@pass_repo
def commit(repo, files, message):
"""Commits outstanding changes.
Commit changes to the given files into the repository. You will need to
"repo push" to push up your changes to other repositories.
If a list of files is omitted, all changes reported by "repo status"
will be committed.
"""
if not message:
marker = '# Files to be committed:'
hint = ['', '', marker, '#']
for file in files:
hint.append('# U %s' % file)
message = click.edit('\n'.join(hint))
if message is None:
click.echo('Aborted!')
return
msg = message.split(marker)[0].rstrip()
if not msg:
click.echo('Aborted! Empty commit message')
return
else:
msg = '\n'.join(message)
click.echo('Files to be committed: %s' % (files,))
click.echo('Commit message:\n' + msg)
@cli.command(short_help='Copies files.')
@click.option('--force', is_flag=True,
help='forcibly copy over an existing managed file')
@click.argument('src', nargs=-1, type=click.Path())
@click.argument('dst', type=click.Path())
@pass_repo
def copy(repo, src, dst, force):
"""Copies one or multiple files to a new location. This copies all
files from SRC to DST.
"""
for fn in src:
click.echo('Copy from %s -> %s' % (fn, dst))

120
main.py Normal file
View file

@ -0,0 +1,120 @@
import math
import ads
import ads.config
import click
import peewee
import requests
from peewee import Model
import config
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
ads.config.token = config.ads_token
@click.group()
@click.version_option('1.0')
@click.pass_context
def cli(ctx):
pass
# print("bla")
cli = cli # type:click.core.Group
@cli.command()
def init():
print("initializing")
db.create_tables([Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords])
# @cli.command()
# @click.argument('file', type=click.Path(exists=True, readable=True))
# @click.option('-p', '--python_file', is_flag=True)
# def add(file, python_file):
# fo = Files(filename=file, pythonfile=python_file)
# fo.save()
# print(file, python_file)
# pass
@cli.command()
@click.argument("search_query")
@click.option("-a", "--author")
@click.option("-t", "--title")
def add(search_query, author, title):
fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
"doctype", "identifier"]
if author:
search_query += "author:" + author
if title:
search_query += "title:" + title
papers = list(ads.SearchQuery(q=search_query, fl=fl))
if len(papers) == 0:
selection = ads.search.Article
exit()
elif len(papers) == 1:
selection = papers[0] # type:ads.search.Article
else:
# first_ten = itertools.islice(papers, 10)
first_ten = papers[:10]
single_paper: ads.search.Article
for index, single_paper in enumerate(first_ten):
print(index, single_paper.title[0])
selected_index = click.prompt('select paper', type=int)
selection = papers[selected_index] # type:ads.search.Article
assert len(selection.doi) == 1
doi = selection.doi[0]
try:
paper = Paper.get(Paper.doi == doi)
print("this paper has already been added")
exit(1)
except peewee.DoesNotExist:
pass
print("fetching bibcode")
q = ads.ExportQuery([selection.bibcode])
bibtex = q.execute()
print("saving in db")
paper = Paper()
assert len(selection.title) == 1
paper.doi = doi
paper.title = selection.title[0]
paper.abstract = selection.abstract
paper.bibcode = selection.bibcode
paper.year = selection.year
paper.pubdate = selection.pubdate
paper.pdf_downloaded = False
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
paper.first_author = Author.get_or_create(name=selection.first_author)[0]
paper.publication = Publication.get_or_create(name=selection.pub)[0]
paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
paper.bibtex = bibtex
paper.save()
for author in db.batch_commit(authors, 100):
PaperAuthors.create(author=author, paper=paper)
print("fetching PDF")
arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
r = requests.get(arxiv_url, stream=True)
print(arxiv_url)
with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
chunk_size = 1024 # bytes
file_size = int(r.headers.get('content-length', 0))
progress_length = math.ceil(file_size // chunk_size)
with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks:
for chunk in progress_chunks:
f.write(chunk)
paper.pdf_downloaded = True
paper.save()
if __name__ == '__main__':
cli()

58
models.py Normal file
View file

@ -0,0 +1,58 @@
from peewee import Model, CharField, SqliteDatabase, BooleanField, TextField, \
ForeignKeyField, DateField
db = SqliteDatabase('storage.db', pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000, # 64MB
'foreign_keys': 1,
'ignore_check_constraints': 0})
db.connect()
class BaseModel(Model):
class Meta:
database = db
class Author(BaseModel):
name = CharField(unique=True)
affiliation = CharField(null=True)
orcid_id = CharField(null=True)
class Keyword(BaseModel):
keyword = CharField(unique=True)
class Publication(BaseModel):
name = CharField(unique=True)
class Doctype(BaseModel):
name = CharField(unique=True)
class Paper(BaseModel):
title = TextField()
abstract = TextField()
doi = CharField(unique=True)
bibtex = TextField()
first_author = ForeignKeyField(Author)
publication = ForeignKeyField(Publication)
doctype = ForeignKeyField(Doctype)
arxiv_identifier = CharField(unique=True)
bibcode = CharField(unique=True)
year = CharField()
pubdate = DateField()
pdf_downloaded = BooleanField()
class PaperAuthors(BaseModel):
paper = ForeignKeyField(Paper)
author = ForeignKeyField(Author)
class PaperKeywords(BaseModel):
paper = ForeignKeyField(Paper)
keyword = ForeignKeyField(Keyword)

14
requirements.txt Normal file
View file

@ -0,0 +1,14 @@
ads==0.12.3
certifi==2018.11.29
chardet==3.0.4
Click==7.0
click-example-repo==0.1
httpretty==0.8.10
idna==2.8
mock==2.0.0
pbr==5.1.1
peewee==3.8.0
requests==2.21.0
six==1.12.0
urllib3==1.24.1
Werkzeug==0.14.1

16
setup.py Normal file
View file

@ -0,0 +1,16 @@
from setuptools import setup
setup(
name='click-example-repo',
version='0.1',
py_modules=['repo'],
include_package_data=True,
install_requires=[
'click',
],
entry_points='''
[console_scripts]
repo=example:cli
paperlibrary=main:cli
''',
)