2018-12-20 12:55:48 +01:00
|
|
|
import json
|
2018-12-19 22:05:58 +01:00
|
|
|
import math
|
2018-12-20 12:55:48 +01:00
|
|
|
import pathlib
|
2018-12-19 22:05:58 +01:00
|
|
|
|
|
|
|
import ads
|
|
|
|
import ads.config
|
|
|
|
import click
|
|
|
|
import peewee
|
|
|
|
import requests
|
|
|
|
from peewee import Model
|
|
|
|
|
|
|
|
import config
|
2018-12-20 12:55:48 +01:00
|
|
|
from create_library import create_library
|
2018-12-19 22:05:58 +01:00
|
|
|
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
|
|
|
|
|
|
|
|
ads.config.token = config.ads_token
|
|
|
|
|
|
|
|
|
|
|
|
@click.group()
|
|
|
|
@click.version_option('1.0')
|
|
|
|
@click.pass_context
|
|
|
|
def cli(ctx):
|
|
|
|
pass
|
|
|
|
# print("bla")
|
|
|
|
|
|
|
|
|
|
|
|
cli = cli # type:click.core.Group
|
|
|
|
|
|
|
|
|
|
|
|
@cli.command()
|
|
|
|
def init():
|
|
|
|
print("initializing")
|
|
|
|
db.create_tables([Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords])
|
|
|
|
|
|
|
|
|
|
|
|
# @cli.command()
|
|
|
|
# @click.argument('file', type=click.Path(exists=True, readable=True))
|
|
|
|
# @click.option('-p', '--python_file', is_flag=True)
|
|
|
|
# def add(file, python_file):
|
|
|
|
# fo = Files(filename=file, pythonfile=python_file)
|
|
|
|
# fo.save()
|
|
|
|
# print(file, python_file)
|
|
|
|
# pass
|
|
|
|
|
|
|
|
@cli.command()
|
|
|
|
@click.argument("search_query")
|
|
|
|
@click.option("-a", "--author")
|
|
|
|
@click.option("-t", "--title")
|
|
|
|
def add(search_query, author, title):
|
|
|
|
fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
|
2018-12-20 12:55:48 +01:00
|
|
|
"keyword", "doctype", "identifier", "links_data"]
|
2018-12-19 22:05:58 +01:00
|
|
|
if author:
|
|
|
|
search_query += "author:" + author
|
|
|
|
if title:
|
|
|
|
search_query += "title:" + title
|
|
|
|
papers = list(ads.SearchQuery(q=search_query, fl=fl))
|
|
|
|
if len(papers) == 0:
|
|
|
|
selection = ads.search.Article
|
|
|
|
exit()
|
|
|
|
elif len(papers) == 1:
|
|
|
|
selection = papers[0] # type:ads.search.Article
|
|
|
|
else:
|
|
|
|
# first_ten = itertools.islice(papers, 10)
|
|
|
|
first_ten = papers[:10]
|
|
|
|
single_paper: ads.search.Article
|
|
|
|
for index, single_paper in enumerate(first_ten):
|
2018-12-20 12:55:48 +01:00
|
|
|
print(index, single_paper.title[0],single_paper.first_author)
|
2018-12-19 22:05:58 +01:00
|
|
|
selected_index = click.prompt('select paper', type=int)
|
|
|
|
selection = papers[selected_index] # type:ads.search.Article
|
|
|
|
|
|
|
|
assert len(selection.doi) == 1
|
|
|
|
doi = selection.doi[0]
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
paper = Paper.get(Paper.doi == doi)
|
|
|
|
print("this paper has already been added")
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
except peewee.DoesNotExist:
|
|
|
|
pass
|
|
|
|
|
|
|
|
print("fetching bibcode")
|
|
|
|
q = ads.ExportQuery([selection.bibcode])
|
|
|
|
bibtex = q.execute()
|
|
|
|
|
|
|
|
print("saving in db")
|
|
|
|
|
|
|
|
paper = Paper()
|
|
|
|
assert len(selection.title) == 1
|
|
|
|
paper.doi = doi
|
|
|
|
paper.title = selection.title[0]
|
|
|
|
paper.abstract = selection.abstract
|
|
|
|
paper.bibcode = selection.bibcode
|
|
|
|
paper.year = selection.year
|
|
|
|
paper.pubdate = selection.pubdate
|
|
|
|
paper.pdf_downloaded = False
|
|
|
|
paper.first_author = Author.get_or_create(name=selection.first_author)[0]
|
|
|
|
paper.publication = Publication.get_or_create(name=selection.pub)[0]
|
|
|
|
paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
|
|
|
|
paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
|
|
|
|
paper.bibtex = bibtex
|
2018-12-20 12:55:48 +01:00
|
|
|
links = [json.loads(string) for string in selection.links_data]
|
|
|
|
print(links)
|
2018-12-19 22:05:58 +01:00
|
|
|
paper.save()
|
2018-12-20 12:55:48 +01:00
|
|
|
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
|
2018-12-19 22:05:58 +01:00
|
|
|
for author in db.batch_commit(authors, 100):
|
|
|
|
PaperAuthors.create(author=author, paper=paper)
|
2018-12-20 12:55:48 +01:00
|
|
|
keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword]
|
|
|
|
for keyword in db.batch_commit(keywords, 100):
|
|
|
|
PaperKeywords.create(keyword=keyword, paper=paper)
|
2018-12-19 22:05:58 +01:00
|
|
|
print("fetching PDF")
|
|
|
|
arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
|
|
|
|
r = requests.get(arxiv_url, stream=True)
|
|
|
|
print(arxiv_url)
|
|
|
|
with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
|
|
|
|
chunk_size = 1024 # bytes
|
|
|
|
file_size = int(r.headers.get('content-length', 0))
|
|
|
|
progress_length = math.ceil(file_size // chunk_size)
|
|
|
|
with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks:
|
|
|
|
for chunk in progress_chunks:
|
|
|
|
f.write(chunk)
|
|
|
|
paper.pdf_downloaded = True
|
|
|
|
paper.save()
|
|
|
|
|
|
|
|
|
2018-12-20 12:55:48 +01:00
|
|
|
@cli.command()
|
|
|
|
def update():
|
|
|
|
create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve())
|
|
|
|
|
|
|
|
|
2018-12-19 22:05:58 +01:00
|
|
|
if __name__ == '__main__':
|
|
|
|
cli()
|