first kind of working library version

2018-12-20 12:55:48 +01:00 · 2018-12-20 12:55:48 +01:00 · a588fc9835
commit a588fc9835
parent 61a247fa56
3 changed files with 64 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,4 +3,5 @@ library/
 storage.*
 .idea/
 *.egg-info
-__pycache__/
+__pycache__/
 browse/
--- a/create_library.py
+++ b/create_library.py
@ -0,0 +1,46 @@
 import pathlib
 import shutil
 import string
 from models import *
 def format_filename(s: str) -> str:
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    filename = ''.join(c for c in s if c in valid_chars)
    # filename = filename.replace(' ', '_')  # I don't like spaces in filenames.
    return filename
 def create_library(librarydir: pathlib.Path, browsedir: pathlib.Path):
    shutil.rmtree(browsedir)
    browsedir.mkdir()
    print("title")
    title_dir = browsedir / "title"
    title_dir.mkdir(exist_ok=True)
    for paper in Paper.select():
        sourcefile = librarydir / "{}.pdf".format(paper.id)
        targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
        targetfile.symlink_to(sourcefile)
    print("author")
    author_dir = browsedir / "authors"
    author_dir.mkdir(exist_ok=True)
    for author in Author.select():
        author_subdir = author_dir / format_filename(author.name)
        author_subdir.mkdir()
        for paper in Paper.select().join(PaperAuthors).where(PaperAuthors.author == author):
            sourcefile = librarydir / "{}.pdf".format(paper.id)
            targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
            targetfile.symlink_to(sourcefile)
    print("keywords")
    keywords_dir = browsedir / "keywords"
    keywords_dir.mkdir(exist_ok=True)
    for keyword in Keyword.select():
        keyword_subdir = keywords_dir / format_filename(keyword.keyword)
        keyword_subdir.mkdir()
        for paper in Paper.select().join(PaperKeywords).where(PaperKeywords.keyword == keyword):
            sourcefile = librarydir / "{}.pdf".format(paper.id)
            targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
            targetfile.symlink_to(sourcefile)
--- a/main.py
+++ b/main.py
@ -1,4 +1,6 @@
 import json
 import math
 import pathlib
 import ads
 import ads.config
@ -8,6 +10,7 @@ import requests
 from peewee import Model
 import config
 from create_library import create_library
 from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
 ads.config.token = config.ads_token
@ -45,7 +48,7 @@ def init():
@click.option("-t", "--title")
 def add(search_query, author, title):
    fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
-          "doctype", "identifier"]
+          "keyword", "doctype", "identifier", "links_data"]
    if author:
        search_query += "author:" + author
    if title:
@ -61,7 +64,7 @@ def add(search_query, author, title):
        first_ten = papers[:10]
        single_paper: ads.search.Article
        for index, single_paper in enumerate(first_ten):
-            print(index, single_paper.title[0])
+            print(index, single_paper.title[0],single_paper.first_author)
        selected_index = click.prompt('select paper', type=int)
        selection = papers[selected_index]  # type:ads.search.Article
@ -92,15 +95,20 @@ def add(search_query, author, title):
    paper.year = selection.year
    paper.pubdate = selection.pubdate
    paper.pdf_downloaded = False
    authors = [Author.get_or_create(name=name)[0] for name in selection.author]
    paper.first_author = Author.get_or_create(name=selection.first_author)[0]
    paper.publication = Publication.get_or_create(name=selection.pub)[0]
    paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
    paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
    paper.bibtex = bibtex
    links = [json.loads(string) for string in selection.links_data]
    print(links)
    paper.save()
    authors = [Author.get_or_create(name=name)[0] for name in selection.author]
    for author in db.batch_commit(authors, 100):
        PaperAuthors.create(author=author, paper=paper)
    keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword]
    for keyword in db.batch_commit(keywords, 100):
        PaperKeywords.create(keyword=keyword, paper=paper)
    print("fetching PDF")
    arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
    r = requests.get(arxiv_url, stream=True)
@ -116,5 +124,10 @@ def add(search_query, author, title):
    paper.save()
@cli.command()
 def update():
    create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve())
 if __name__ == '__main__':
    cli()