2018-03-16 16:23:25 +01:00
|
|
|
import markovify
|
2018-06-08 21:52:56 +02:00
|
|
|
from sacremoses import MosesDetokenizer, MosesTokenizer
|
2018-03-16 16:23:25 +01:00
|
|
|
|
|
|
|
tokenizer = MosesTokenizer()
|
|
|
|
detokenizer = MosesDetokenizer()
|
|
|
|
|
|
|
|
|
2018-03-16 20:31:43 +01:00
|
|
|
class MarkovText(markovify.Text):
|
2018-03-16 16:23:25 +01:00
|
|
|
def word_split(self, sentence):
|
|
|
|
return tokenizer.tokenize(sentence)
|
|
|
|
|
|
|
|
def word_join(self, words):
|
2018-03-16 18:48:54 +01:00
|
|
|
return detokenizer.detokenize(words, return_str=True)
|
2018-03-16 20:31:43 +01:00
|
|
|
|
|
|
|
|
|
|
|
class MarkovUserName(markovify.Text):
|
|
|
|
def word_split(self, word):
|
|
|
|
return list(word)
|
|
|
|
|
|
|
|
def word_join(self, characters):
|
|
|
|
return "".join(characters)
|