mirror of
https://github.com/Findus23/cr-search.git
synced 2024-09-19 15:23:44 +02:00
use spacy 3
This commit is contained in:
parent
f2d9044433
commit
56a52192cd
1 changed files with 3 additions and 3 deletions
|
@ -2,10 +2,10 @@ import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
import spacy as spacy
|
import en_core_web_md
|
||||||
from alive_progress import alive_bar
|
from alive_progress import alive_bar
|
||||||
from peewee import chunked
|
from peewee import chunked
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import Language
|
||||||
from spacy.tokens.span import Span
|
from spacy.tokens.span import Span
|
||||||
from spacy.tokens.token import Token
|
from spacy.tokens.token import Token
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ class Noun:
|
||||||
|
|
||||||
lemma_cache: Dict[str, str] = {}
|
lemma_cache: Dict[str, str] = {}
|
||||||
|
|
||||||
nlp: English = spacy.load("en_core_web_sm", disable=["ner", "textcat"])
|
nlp: Language = en_core_web_md.load(disable=["ner", "textcat"])
|
||||||
nlp.Defaults.stop_words = STOP_WORDS
|
nlp.Defaults.stop_words = STOP_WORDS
|
||||||
for episode in Episode.select().where((Episode.phrases_imported == False) & (Episode.text_imported == True)).order_by(
|
for episode in Episode.select().where((Episode.phrases_imported == False) & (Episode.text_imported == True)).order_by(
|
||||||
Episode.id):
|
Episode.id):
|
||||||
|
|
Loading…
Reference in a new issue