first kind of working library version
This commit is contained in:
parent
61a247fa56
commit
a588fc9835
3 changed files with 64 additions and 4 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,4 +3,5 @@ library/
|
||||||
storage.*
|
storage.*
|
||||||
.idea/
|
.idea/
|
||||||
*.egg-info
|
*.egg-info
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
browse/
|
||||||
|
|
46
create_library.py
Normal file
46
create_library.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
import pathlib
|
||||||
|
import shutil
|
||||||
|
import string
|
||||||
|
|
||||||
|
from models import *
|
||||||
|
|
||||||
|
|
||||||
|
def format_filename(s: str) -> str:
|
||||||
|
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||||
|
filename = ''.join(c for c in s if c in valid_chars)
|
||||||
|
# filename = filename.replace(' ', '_') # I don't like spaces in filenames.
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
|
def create_library(librarydir: pathlib.Path, browsedir: pathlib.Path):
|
||||||
|
shutil.rmtree(browsedir)
|
||||||
|
browsedir.mkdir()
|
||||||
|
print("title")
|
||||||
|
title_dir = browsedir / "title"
|
||||||
|
title_dir.mkdir(exist_ok=True)
|
||||||
|
for paper in Paper.select():
|
||||||
|
sourcefile = librarydir / "{}.pdf".format(paper.id)
|
||||||
|
targetfile = title_dir / "{}.pdf".format(format_filename(paper.title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
print("author")
|
||||||
|
author_dir = browsedir / "authors"
|
||||||
|
author_dir.mkdir(exist_ok=True)
|
||||||
|
for author in Author.select():
|
||||||
|
author_subdir = author_dir / format_filename(author.name)
|
||||||
|
author_subdir.mkdir()
|
||||||
|
for paper in Paper.select().join(PaperAuthors).where(PaperAuthors.author == author):
|
||||||
|
sourcefile = librarydir / "{}.pdf".format(paper.id)
|
||||||
|
targetfile = author_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
||||||
|
|
||||||
|
print("keywords")
|
||||||
|
keywords_dir = browsedir / "keywords"
|
||||||
|
keywords_dir.mkdir(exist_ok=True)
|
||||||
|
for keyword in Keyword.select():
|
||||||
|
keyword_subdir = keywords_dir / format_filename(keyword.keyword)
|
||||||
|
keyword_subdir.mkdir()
|
||||||
|
for paper in Paper.select().join(PaperKeywords).where(PaperKeywords.keyword == keyword):
|
||||||
|
sourcefile = librarydir / "{}.pdf".format(paper.id)
|
||||||
|
targetfile = keyword_subdir / "{}.pdf".format(format_filename(paper.title))
|
||||||
|
targetfile.symlink_to(sourcefile)
|
19
main.py
19
main.py
|
@ -1,4 +1,6 @@
|
||||||
|
import json
|
||||||
import math
|
import math
|
||||||
|
import pathlib
|
||||||
|
|
||||||
import ads
|
import ads
|
||||||
import ads.config
|
import ads.config
|
||||||
|
@ -8,6 +10,7 @@ import requests
|
||||||
from peewee import Model
|
from peewee import Model
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
from create_library import create_library
|
||||||
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
|
from models import Author, Keyword, Publication, Doctype, Paper, PaperAuthors, PaperKeywords, db
|
||||||
|
|
||||||
ads.config.token = config.ads_token
|
ads.config.token = config.ads_token
|
||||||
|
@ -45,7 +48,7 @@ def init():
|
||||||
@click.option("-t", "--title")
|
@click.option("-t", "--title")
|
||||||
def add(search_query, author, title):
|
def add(search_query, author, title):
|
||||||
fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
|
fl = ['id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub",
|
||||||
"doctype", "identifier"]
|
"keyword", "doctype", "identifier", "links_data"]
|
||||||
if author:
|
if author:
|
||||||
search_query += "author:" + author
|
search_query += "author:" + author
|
||||||
if title:
|
if title:
|
||||||
|
@ -61,7 +64,7 @@ def add(search_query, author, title):
|
||||||
first_ten = papers[:10]
|
first_ten = papers[:10]
|
||||||
single_paper: ads.search.Article
|
single_paper: ads.search.Article
|
||||||
for index, single_paper in enumerate(first_ten):
|
for index, single_paper in enumerate(first_ten):
|
||||||
print(index, single_paper.title[0])
|
print(index, single_paper.title[0],single_paper.first_author)
|
||||||
selected_index = click.prompt('select paper', type=int)
|
selected_index = click.prompt('select paper', type=int)
|
||||||
selection = papers[selected_index] # type:ads.search.Article
|
selection = papers[selected_index] # type:ads.search.Article
|
||||||
|
|
||||||
|
@ -92,15 +95,20 @@ def add(search_query, author, title):
|
||||||
paper.year = selection.year
|
paper.year = selection.year
|
||||||
paper.pubdate = selection.pubdate
|
paper.pubdate = selection.pubdate
|
||||||
paper.pdf_downloaded = False
|
paper.pdf_downloaded = False
|
||||||
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
|
|
||||||
paper.first_author = Author.get_or_create(name=selection.first_author)[0]
|
paper.first_author = Author.get_or_create(name=selection.first_author)[0]
|
||||||
paper.publication = Publication.get_or_create(name=selection.pub)[0]
|
paper.publication = Publication.get_or_create(name=selection.pub)[0]
|
||||||
paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
|
paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
|
||||||
paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
|
paper.arxiv_identifier = [ident for ident in selection.identifier if "arXiv:" in ident][0].split("arXiv:")[-1]
|
||||||
paper.bibtex = bibtex
|
paper.bibtex = bibtex
|
||||||
|
links = [json.loads(string) for string in selection.links_data]
|
||||||
|
print(links)
|
||||||
paper.save()
|
paper.save()
|
||||||
|
authors = [Author.get_or_create(name=name)[0] for name in selection.author]
|
||||||
for author in db.batch_commit(authors, 100):
|
for author in db.batch_commit(authors, 100):
|
||||||
PaperAuthors.create(author=author, paper=paper)
|
PaperAuthors.create(author=author, paper=paper)
|
||||||
|
keywords = [Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword]
|
||||||
|
for keyword in db.batch_commit(keywords, 100):
|
||||||
|
PaperKeywords.create(keyword=keyword, paper=paper)
|
||||||
print("fetching PDF")
|
print("fetching PDF")
|
||||||
arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
|
arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
|
||||||
r = requests.get(arxiv_url, stream=True)
|
r = requests.get(arxiv_url, stream=True)
|
||||||
|
@ -116,5 +124,10 @@ def add(search_query, author, title):
|
||||||
paper.save()
|
paper.save()
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
def update():
|
||||||
|
create_library(pathlib.Path('./library').resolve(), pathlib.Path('./browse').resolve())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
cli()
|
cli()
|
||||||
|
|
Reference in a new issue