PaperLibrary-old/main.py

import json
import math
import pathlib

import ads
import ads.config
import click
import peewee
import requests
from peewee import Model

import config
from create_library import create_library
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db

ads.config.token = config.ads_token


@click.group()
@click.version_option('1.0')
@click.pass_context
def cli(ctx):
    pass
    # print("bla")


cli = cli  # type:click.core.Group


@cli.command()
def init():
    print("initializing")
    db.create_tables([Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords])


# @cli.command()
# @click.argument('file', type=click.Path(exists=True, readable=True))
# @click.option('-p', '--python_file', is_flag=True)
# def add(file, python_file):
#     fo = Files(filename=file, pythonfile=python_file)
#     fo.save()
#     print(file, python_file)
#     pass

@cli.command()
@click.argument("search_query")
@click.option("-a", "--author")
@click.option("-t", "--title")
def add(search_query, author, title):
    fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
          "keyword", "doctype", "identifier", "links_data"]
    if author:
        search_query += "author:" + author
    if title:
        search_query += "title:" + title
    papers = list(ads.SearchQuery(q=search_query, fl=fl))
    if len(papers) == 0:
        selection = ads.search.Article
        exit()
    elif len(papers) == 1:
        selection = papers[0]  # type:ads.search.Article
    else:
        # first_ten = itertools.islice(papers, 10)
        first_ten = papers[:10]
        single_paper: ads.search.Article
        for index, single_paper in enumerate(first_ten):
            print(index, single_paper.title[0],single_paper.first_author)
        selected_index = click.prompt('select paper', type=int)
        selection = papers[selected_index]  # type:ads.search.Article

    assert len(selection.doi) == 1
    doi = selection.doi[0]

    try:

        paper = Paper.get(Paper.doi == doi)
        print("this paper has already been added")
        exit(1)

    except peewee.DoesNotExist:
        pass

    print("fetching bibcode")
    q = ads.ExportQuery([selection.bibcode])
    bibtex = q.execute()

    print("saving in db")

    paper = Paper()
    assert len(selection.title) == 1
    paper.doi = doi
    paper.title = selection.title[0]
    paper.abstract = selection.abstract
    paper.bibcode = selection.bibcode
    paper.year = selection.year
    paper.pubdate = selection.pubdate
    paper.pdf_downloaded = False
    paper.first_author = Author.get_or_create(name=selection.first_author)[0]
    paper.publication = Publication.get_or_create(name=selection.pub)[0]
    paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
    paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
    paper.bibtex = bibtex
    links = [json.loads(string) for string in selection.links_data]
    print(links)
    paper.save()
    authors = [Author.get_or_create(name=name)[0] for name in selection.author]
    for author in db.batch_commit(authors, 100):
        PaperAuthors.create(author=author, paper=paper)
    keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword]
    for keyword in db.batch_commit(keywords, 100):
        PaperKeywords.create(keyword=keyword, paper=paper)
    print("fetching PDF")
    arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
    r = requests.get(arxiv_url, stream=True)
    print(arxiv_url)
    with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
        chunk_size = 1024  # bytes
        file_size = int(r.headers.get('content-length', 0))
        progress_length = math.ceil(file_size // chunk_size)
        with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks:
            for chunk in progress_chunks:
                f.write(chunk)
    paper.pdf_downloaded = True
    paper.save()


@cli.command()
def update():
    create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve())


if __name__ == '__main__':
    cli()
first kind of working library version 2018-12-20 12:55:48 +01:00			`import json`
first working version 2018-12-19 22:05:58 +01:00			`import math`
first kind of working library version 2018-12-20 12:55:48 +01:00			`import pathlib`
first working version 2018-12-19 22:05:58 +01:00
			`import ads`
			`import ads.config`
			`import click`
			`import peewee`
			`import requests`
			`from peewee import Model`

			`import config`
first kind of working library version 2018-12-20 12:55:48 +01:00			`from create_library import create_library`
first working version 2018-12-19 22:05:58 +01:00			`from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db`

			`ads.config.token = config.ads_token`


			`@click.group()`
			`@click.version_option('1.0')`
			`@click.pass_context`
			`def cli(ctx):`
			`pass`
			`# print("bla")`


			`cli = cli # type:click.core.Group`


			`@cli.command()`
			`def init():`
			`print("initializing")`
			`db.create_tables([Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords])`


			`# @cli.command()`
			`# @click.argument('file', type=click.Path(exists=True, readable=True))`
			`# @click.option('-p', '--python_file', is_flag=True)`
			`# def add(file, python_file):`
			`# fo = Files(filename=file, pythonfile=python_file)`
			`# fo.save()`
			`# print(file, python_file)`
			`# pass`

			`@cli.command()`
			`@click.argument("search_query")`
			`@click.option("-a", "--author")`
			`@click.option("-t", "--title")`
			`def add(search_query, author, title):`
			`fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",`
first kind of working library version 2018-12-20 12:55:48 +01:00			`"keyword", "doctype", "identifier", "links_data"]`
first working version 2018-12-19 22:05:58 +01:00			`if author:`
			`search_query += "author:" + author`
			`if title:`
			`search_query += "title:" + title`
			`papers = list(ads.SearchQuery(q=search_query, fl=fl))`
			`if len(papers) == 0:`
			`selection = ads.search.Article`
			`exit()`
			`elif len(papers) == 1:`
			`selection = papers[0] # type:ads.search.Article`
			`else:`
			`# first_ten = itertools.islice(papers, 10)`
			`first_ten = papers[:10]`
			`single_paper: ads.search.Article`
			`for index, single_paper in enumerate(first_ten):`
first kind of working library version 2018-12-20 12:55:48 +01:00			`print(index, single_paper.title[0],single_paper.first_author)`
first working version 2018-12-19 22:05:58 +01:00			`selected_index = click.prompt('select paper', type=int)`
			`selection = papers[selected_index] # type:ads.search.Article`

			`assert len(selection.doi) == 1`
			`doi = selection.doi[0]`

			`try:`

			`paper = Paper.get(Paper.doi == doi)`
			`print("this paper has already been added")`
			`exit(1)`

			`except peewee.DoesNotExist:`
			`pass`

			`print("fetching bibcode")`
			`q = ads.ExportQuery([selection.bibcode])`
			`bibtex = q.execute()`

			`print("saving in db")`

			`paper = Paper()`
			`assert len(selection.title) == 1`
			`paper.doi = doi`
			`paper.title = selection.title[0]`
			`paper.abstract = selection.abstract`
			`paper.bibcode = selection.bibcode`
			`paper.year = selection.year`
			`paper.pubdate = selection.pubdate`
			`paper.pdf_downloaded = False`
			`paper.first_author = Author.get_or_create(name=selection.first_author)[0]`
			`paper.publication = Publication.get_or_create(name=selection.pub)[0]`
			`paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]`
			`paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]`
			`paper.bibtex = bibtex`
first kind of working library version 2018-12-20 12:55:48 +01:00			`links = [json.loads(string) for string in selection.links_data]`
			`print(links)`
first working version 2018-12-19 22:05:58 +01:00			`paper.save()`
first kind of working library version 2018-12-20 12:55:48 +01:00			`authors = [Author.get_or_create(name=name)[0] for name in selection.author]`
first working version 2018-12-19 22:05:58 +01:00			`for author in db.batch_commit(authors, 100):`
			`PaperAuthors.create(author=author, paper=paper)`
first kind of working library version 2018-12-20 12:55:48 +01:00			`keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword]`
			`for keyword in db.batch_commit(keywords, 100):`
			`PaperKeywords.create(keyword=keyword, paper=paper)`
first working version 2018-12-19 22:05:58 +01:00			`print("fetching PDF")`
			`arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)`
			`r = requests.get(arxiv_url, stream=True)`
			`print(arxiv_url)`
			`with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:`
			`chunk_size = 1024 # bytes`
			`file_size = int(r.headers.get('content-length', 0))`
			`progress_length = math.ceil(file_size // chunk_size)`
			`with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks:`
			`for chunk in progress_chunks:`
			`f.write(chunk)`
			`paper.pdf_downloaded = True`
			`paper.save()`


first kind of working library version 2018-12-20 12:55:48 +01:00			`@cli.command()`
			`def update():`
			`create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve())`


first working version 2018-12-19 22:05:58 +01:00			`if __name__ == '__main__':`
			`cli()`