1
0
Fork 0
mirror of https://github.com/Findus23/cr-search.git synced 2024-09-19 15:23:44 +02:00

use spacy 3

This commit is contained in:
Lukas Winkler 2021-01-18 18:39:40 +01:00
parent f2d9044433
commit 56a52192cd
Signed by: lukas
GPG key ID: 54DE4D798D244853

View file

@ -2,10 +2,10 @@ import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict from typing import Dict
import spacy as spacy import en_core_web_md
from alive_progress import alive_bar from alive_progress import alive_bar
from peewee import chunked from peewee import chunked
from spacy.lang.en import English from spacy.lang.en import Language
from spacy.tokens.span import Span from spacy.tokens.span import Span
from spacy.tokens.token import Token from spacy.tokens.token import Token
@ -23,7 +23,7 @@ class Noun:
lemma_cache: Dict[str, str] = {} lemma_cache: Dict[str, str] = {}
nlp: English = spacy.load("en_core_web_sm", disable=["ner", "textcat"]) nlp: Language = en_core_web_md.load(disable=["ner", "textcat"])
nlp.Defaults.stop_words = STOP_WORDS nlp.Defaults.stop_words = STOP_WORDS
for episode in Episode.select().where((Episode.phrases_imported == False) & (Episode.text_imported == True)).order_by( for episode in Episode.select().where((Episode.phrases_imported == False) & (Episode.text_imported == True)).order_by(
Episode.id): Episode.id):