diff --git a/consume.py b/consume.py
index 12266ae..4c0d515 100644
--- a/consume.py
+++ b/consume.py
@@ -7,7 +7,7 @@ from parsexml import parse_posts, parse_comments, parse_usernames
 from utils import *
 # os.chdir("/mydir")
 for file in glob.glob("downloads/**/*.7z"):
-    if not "worldbuilding" in file:
+    if not "raspberry" in file:
         continue
     code = os.path.basename(os.path.splitext(file)[0])
     print(code)
diff --git a/markov.py b/markov.py
index 616998b..e65406d 100644
--- a/markov.py
+++ b/markov.py
@@ -5,9 +5,17 @@ tokenizer = MosesTokenizer()
 detokenizer = MosesDetokenizer()
 
 
-class POSifiedText(markovify.Text):
+class MarkovText(markovify.Text):
     def word_split(self, sentence):
         return tokenizer.tokenize(sentence)
 
     def word_join(self, words):
         return detokenizer.detokenize(words, return_str=True)
+
+
+class MarkovUserName(markovify.Text):
+    def word_split(self, word):
+        return list(word)
+
+    def word_join(self, characters):
+        return "".join(characters)
diff --git a/parsexml.py b/parsexml.py
index 03196b4..9b89f01 100644
--- a/parsexml.py
+++ b/parsexml.py
@@ -1,7 +1,6 @@
 from xml.etree import ElementTree
 
 import jsonlines
-from bs4 import BeautifulSoup
 
 from utils import *
 
@@ -22,8 +21,7 @@ def parse_posts(inputdir, outputdir):
                 titles.write(title)
             body = element.get('Body')
             if body:
-                soup = BeautifulSoup(body, "lxml")
-                text = soup.get_text()
+                text = html2text(body)
                 if element.get('PostTypeId') == "1":
                     questions.write(text)
                 else:
diff --git a/text_generator.py b/text_generator.py
index bc7056a..8e99835 100644
--- a/text_generator.py
+++ b/text_generator.py
@@ -1,38 +1,41 @@
 import jsonlines
 import markovify
+import os
 from nltk.tokenize.moses import MosesDetokenizer, MosesTokenizer
 
-from markov import POSifiedText
+from markov import MarkovText, MarkovUserName
 from utils import *
 
-detokenizer = MosesDetokenizer()
 
-BASEDIR, mode = get_settings(2)
-if mode not in ["Questions", "Answers", "Titles"]:
-    print("error")
-    exit()
-chainfile = BASEDIR + '/{type}.chain.json'.format(type=mode)
+def get_markov(mode):
+    if mode == "Usernames":
+        return MarkovUserName
+    else:
+        return MarkovText
 
-try:
+
+def get_state_size(mode):
+    return 1 if mode == "Titles" else 3 if mode == "Usernames" else 2
+
+
+def load_chain(chainfile, mode):
+    markov = get_markov(mode)
     with open(chainfile, 'r') as myfile:
         data = myfile.read()
-        chain = POSifiedText.from_json(data)
-        # raise FileNotFoundError
         print("using existing file\n")
+        return markov.from_json(data)
 
-except FileNotFoundError:
-    tokenizer = MosesTokenizer()
 
+def generate_chain(basedir, mode):
     combined_cains = None
     chainlist = []
+    markov = get_markov(mode)
     i = 0
-    with jsonlines.open(BASEDIR + "/{type}.jsonl".format(type=mode), mode="r") as content:
+    with jsonlines.open(basedir + "/{type}.jsonl".format(type=mode), mode="r") as content:
         for text in content:
             text = text.strip()
-            # tokens = tokenizer.tokenize(text=text.replace("\n", " THISISANEWLINE "))
             try:
-                chain = POSifiedText(text, (1 if mode == "Titles" else 2), retain_original=False)
-                # chain = markovify.Chain([tokens], (1 if mode == "Titles" else 2))
+                chain = markov(text, get_state_size(mode), retain_original=False)
             except KeyError:
                 continue
             chainlist.append(chain)
@@ -50,16 +53,30 @@ except FileNotFoundError:
     chain = markovify.combine([combined_cains, subtotal_chain])
     with open(chainfile, 'w') as outfile:
         outfile.write(chain.to_json())
+    print_ram()
+    return chain
 
-for _ in range(10):
-    # walk = []
-    # for text in chain.gen():
-    #     if len(walk) > 100:
-    #         break
-    #     walk.append(text)
-    # result = detokenizer.detokenize(walk, return_str=True)
-    # print(result.replace("THISISANEWLINE ", "\n"))
-    print(chain.make_sentence())
-    print("-----------------------------------")
 
-print_ram()
+if __name__ == "__main__":
+    basedir, mode = get_settings(2)
+    if mode not in ["Questions", "Answers", "Titles", "Usernames"]:
+        print("error")
+        exit()
+    chainfile = basedir + '/{type}.chain.json'.format(type=mode)
+    if os.path.exists(chainfile):
+        chain = load_chain(chainfile, mode)
+    else:
+        chain = generate_chain(basedir, mode)
+
+    for _ in range(10):
+        # walk = []
+        # for text in chain.gen():
+        #     if len(walk) > 100:
+        #         break
+        #     walk.append(text)
+        # result = detokenizer.detokenize(walk, return_str=True)
+        # print(result.replace("THISISANEWLINE ", "\n"))
+        print(chain.make_sentence())
+        print("-----------------------------------")
+
+    print_ram()
diff --git a/utils.py b/utils.py
index e8dc501..898d4d0 100644
--- a/utils.py
+++ b/utils.py
@@ -2,6 +2,8 @@ import sys
 
 import resource
 
+from bs4 import BeautifulSoup
+
 
 def print_stats(i, skipped=None):
     print("{number} total entries".format(number=i))
@@ -14,6 +16,13 @@ def print_ram():
     print("used {mb}MB".format(mb=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss // 1024))
 
 
+def html2text(body):
+    soup = BeautifulSoup(body, "lxml")
+    for code in soup.find_all("code"):
+        code.decompose()
+    return soup.get_text()
+
+
 def get_settings(count):
     if len(sys.argv) != count + 1:
         if count == 1: