first kind of working library version
This commit is contained in:
parent
61a247fa56
commit
a588fc9835
3 changed files with 64 additions and 4 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,4 +3,5 @@ library/
|
|||
storage.*
|
||||
.idea/
|
||||
*.egg-info
|
||||
__pycache__/
|
||||
__pycache__/
|
||||
browse/
|
||||
|
|
46
create_library.py
Normal file
46
create_library.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
import pathlib
|
||||
import shutil
|
||||
import string
|
||||
|
||||
from models import *
|
||||
|
||||
|
||||
def format_filename(s: str) -> str:
|
||||
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||
filename = ''.join(c for c in s if c in valid_chars)
|
||||
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
|
||||
return filename
|
||||
|
||||
|
||||
def create_library(librarydir: pathlib.Path, browsedir: pathlib.Path):
|
||||
shutil.rmtree(browsedir)
|
||||
browsedir.mkdir()
|
||||
print("title")
|
||||
title_dir = browsedir / "title"
|
||||
title_dir.mkdir(exist_ok=True)
|
||||
for paper in Paper.select():
|
||||
sourcefile = librarydir / "{}.pdf".format(paper.id)
|
||||
targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
|
||||
targetfile.symlink_to(sourcefile)
|
||||
|
||||
print("author")
|
||||
author_dir = browsedir / "authors"
|
||||
author_dir.mkdir(exist_ok=True)
|
||||
for author in Author.select():
|
||||
author_subdir = author_dir / format_filename(author.name)
|
||||
author_subdir.mkdir()
|
||||
for paper in Paper.select().join(PaperAuthors).where(PaperAuthors.author == author):
|
||||
sourcefile = librarydir / "{}.pdf".format(paper.id)
|
||||
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||
targetfile.symlink_to(sourcefile)
|
||||
|
||||
print("keywords")
|
||||
keywords_dir = browsedir / "keywords"
|
||||
keywords_dir.mkdir(exist_ok=True)
|
||||
for keyword in Keyword.select():
|
||||
keyword_subdir = keywords_dir / format_filename(keyword.keyword)
|
||||
keyword_subdir.mkdir()
|
||||
for paper in Paper.select().join(PaperKeywords).where(PaperKeywords.keyword == keyword):
|
||||
sourcefile = librarydir / "{}.pdf".format(paper.id)
|
||||
targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||
targetfile.symlink_to(sourcefile)
|
19
main.py
19
main.py
|
@ -1,4 +1,6 @@
|
|||
import json
|
||||
import math
|
||||
import pathlib
|
||||
|
||||
import ads
|
||||
import ads.config
|
||||
|
@ -8,6 +10,7 @@ import requests
|
|||
from peewee import Model
|
||||
|
||||
import config
|
||||
from create_library import create_library
|
||||
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
|
||||
|
||||
ads.config.token = config.ads_token
|
||||
|
@ -45,7 +48,7 @@ def init():
|
|||
@click.option("-t", "--title")
|
||||
def add(search_query, author, title):
|
||||
fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
|
||||
"doctype", "identifier"]
|
||||
"keyword", "doctype", "identifier", "links_data"]
|
||||
if author:
|
||||
search_query += "author:" + author
|
||||
if title:
|
||||
|
@ -61,7 +64,7 @@ def add(search_query, author, title):
|
|||
first_ten = papers[:10]
|
||||
single_paper: ads.search.Article
|
||||
for index, single_paper in enumerate(first_ten):
|
||||
print(index, single_paper.title[0])
|
||||
print(index, single_paper.title[0],single_paper.first_author)
|
||||
selected_index = click.prompt('select paper', type=int)
|
||||
selection = papers[selected_index] # type:ads.search.Article
|
||||
|
||||
|
@ -92,15 +95,20 @@ def add(search_query, author, title):
|
|||
paper.year = selection.year
|
||||
paper.pubdate = selection.pubdate
|
||||
paper.pdf_downloaded = False
|
||||
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
|
||||
paper.first_author = Author.get_or_create(name=selection.first_author)[0]
|
||||
paper.publication = Publication.get_or_create(name=selection.pub)[0]
|
||||
paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
|
||||
paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
|
||||
paper.bibtex = bibtex
|
||||
links = [json.loads(string) for string in selection.links_data]
|
||||
print(links)
|
||||
paper.save()
|
||||
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
|
||||
for author in db.batch_commit(authors, 100):
|
||||
PaperAuthors.create(author=author, paper=paper)
|
||||
keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword]
|
||||
for keyword in db.batch_commit(keywords, 100):
|
||||
PaperKeywords.create(keyword=keyword, paper=paper)
|
||||
print("fetching PDF")
|
||||
arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
|
||||
r = requests.get(arxiv_url, stream=True)
|
||||
|
@ -116,5 +124,10 @@ def add(search_query, author, title):
|
|||
paper.save()
|
||||
|
||||
|
||||
@cli.command()
|
||||
def update():
|
||||
create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cli()
|
||||
|
|
Reference in a new issue