1
0
Fork 0
This repository has been archived on 2024-06-28. You can view files and clone it, but cannot push or open issues or pull requests.
PaperLibrary-old/main.py

120 lines
3.7 KiB
Python

import math
import ads
import ads.config
import click
import peewee
import requests
from peewee import Model
import config
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
ads.config.token = config.ads_token
@click.group()
@click.version_option('1.0')
@click.pass_context
def cli(ctx):
pass
# print("bla")
cli = cli # type:click.core.Group
@cli.command()
def init():
print("initializing")
db.create_tables([Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords])
# @cli.command()
# @click.argument('file', type=click.Path(exists=True, readable=True))
# @click.option('-p', '--python_file', is_flag=True)
# def add(file, python_file):
# fo = Files(filename=file, pythonfile=python_file)
# fo.save()
# print(file, python_file)
# pass
@cli.command()
@click.argument("search_query")
@click.option("-a", "--author")
@click.option("-t", "--title")
def add(search_query, author, title):
fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
"doctype", "identifier"]
if author:
search_query += "author:" + author
if title:
search_query += "title:" + title
papers = list(ads.SearchQuery(q=search_query, fl=fl))
if len(papers) == 0:
selection = ads.search.Article
exit()
elif len(papers) == 1:
selection = papers[0] # type:ads.search.Article
else:
# first_ten = itertools.islice(papers, 10)
first_ten = papers[:10]
single_paper: ads.search.Article
for index, single_paper in enumerate(first_ten):
print(index, single_paper.title[0])
selected_index = click.prompt('select paper', type=int)
selection = papers[selected_index] # type:ads.search.Article
assert len(selection.doi) == 1
doi = selection.doi[0]
try:
paper = Paper.get(Paper.doi == doi)
print("this paper has already been added")
exit(1)
except peewee.DoesNotExist:
pass
print("fetching bibcode")
q = ads.ExportQuery([selection.bibcode])
bibtex = q.execute()
print("saving in db")
paper = Paper()
assert len(selection.title) == 1
paper.doi = doi
paper.title = selection.title[0]
paper.abstract = selection.abstract
paper.bibcode = selection.bibcode
paper.year = selection.year
paper.pubdate = selection.pubdate
paper.pdf_downloaded = False
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
paper.first_author = Author.get_or_create(name=selection.first_author)[0]
paper.publication = Publication.get_or_create(name=selection.pub)[0]
paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
paper.bibtex = bibtex
paper.save()
for author in db.batch_commit(authors, 100):
PaperAuthors.create(author=author, paper=paper)
print("fetching PDF")
arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
r = requests.get(arxiv_url, stream=True)
print(arxiv_url)
with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
chunk_size = 1024 # bytes
file_size = int(r.headers.get('content-length', 0))
progress_length = math.ceil(file_size // chunk_size)
with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks:
for chunk in progress_chunks:
f.write(chunk)
paper.pdf_downloaded = True
paper.save()
if __name__ == '__main__':
cli()