From 72573c92b5535f1ca833fb5767486ca6ac83cf4b Mon Sep 17 00:00:00 2001 From: ChUrl Date: Tue, 8 Nov 2022 20:46:00 +0100 Subject: [PATCH] remove old textgen --- textgen.py_deprecated | 68 ------------------------------------------- 1 file changed, 68 deletions(-) delete mode 100644 textgen.py_deprecated diff --git a/textgen.py_deprecated b/textgen.py_deprecated deleted file mode 100644 index 01955fb..0000000 --- a/textgen.py_deprecated +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python3 - -from rich.traceback import install - -install() - -import re -import random - - -class TextGen: - def __init__(self, filename, n): - with open(filename) as file: - self.wordbase = re.sub(r"[^a-zäöüß'.,]+", " ", file.read().lower()).split() - - self.word_table = dict() - self.order = n - - self.train_words(self.order) - - def train_words(self, n): - """ - Erzeugt die Markov-Chain mit Prefix-Länge n - """ - print(f"Training with {len(self.wordbase)} words.") - - # init the frequencies - for i in range(len(self.wordbase) - n - 1): - prefix = tuple(self.wordbase[i : i + n]) - suffix = self.wordbase[i + n] - - if prefix not in self.word_table: - self.word_table[prefix] = [] - - # if suffix not in self.table[prefix]: # disable for probabilities - self.word_table[prefix].append(suffix) - - print(f"Generated suffixes for {len(self.word_table)} prefixes.") - - def generate_random(self, n): - fword = random.choice(list(self.word_table.keys())) - output = [*fword] - - for _ in range(self.order, n): - output.append(self.generate_word_by_word(tuple(output[-self.order :]))) - - return output - - def generate_word_by_word(self, prefix: tuple): - if prefix not in self.word_table: - print(f"Prefix {prefix} not in table") - for key in self.word_table.keys(): - if key[-1] == prefix[-1]: - return random.choice(self.word_table[key]) - - return random.choice(self.word_table[prefix]) - - def generate_sentences(self, n): - return [self.generate_sentence for _ in range(n)] - - def generate_sentence(self): - fword = random.choice(list(self.word_table.keys())) - output = [*fword] - - while "." not in output[-1]: - output.append(self.generate_word_by_word(tuple(output[-self.order :]))) - - return output