remove old textgen
This commit is contained in:
@ -1,68 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from rich.traceback import install
|
||||
|
||||
install()
|
||||
|
||||
import re
|
||||
import random
|
||||
|
||||
|
||||
class TextGen:
|
||||
def __init__(self, filename, n):
|
||||
with open(filename) as file:
|
||||
self.wordbase = re.sub(r"[^a-zäöüß'.,]+", " ", file.read().lower()).split()
|
||||
|
||||
self.word_table = dict()
|
||||
self.order = n
|
||||
|
||||
self.train_words(self.order)
|
||||
|
||||
def train_words(self, n):
|
||||
"""
|
||||
Erzeugt die Markov-Chain mit Prefix-Länge n
|
||||
"""
|
||||
print(f"Training with {len(self.wordbase)} words.")
|
||||
|
||||
# init the frequencies
|
||||
for i in range(len(self.wordbase) - n - 1):
|
||||
prefix = tuple(self.wordbase[i : i + n])
|
||||
suffix = self.wordbase[i + n]
|
||||
|
||||
if prefix not in self.word_table:
|
||||
self.word_table[prefix] = []
|
||||
|
||||
# if suffix not in self.table[prefix]: # disable for probabilities
|
||||
self.word_table[prefix].append(suffix)
|
||||
|
||||
print(f"Generated suffixes for {len(self.word_table)} prefixes.")
|
||||
|
||||
def generate_random(self, n):
|
||||
fword = random.choice(list(self.word_table.keys()))
|
||||
output = [*fword]
|
||||
|
||||
for _ in range(self.order, n):
|
||||
output.append(self.generate_word_by_word(tuple(output[-self.order :])))
|
||||
|
||||
return output
|
||||
|
||||
def generate_word_by_word(self, prefix: tuple):
|
||||
if prefix not in self.word_table:
|
||||
print(f"Prefix {prefix} not in table")
|
||||
for key in self.word_table.keys():
|
||||
if key[-1] == prefix[-1]:
|
||||
return random.choice(self.word_table[key])
|
||||
|
||||
return random.choice(self.word_table[prefix])
|
||||
|
||||
def generate_sentences(self, n):
|
||||
return [self.generate_sentence for _ in range(n)]
|
||||
|
||||
def generate_sentence(self):
|
||||
fword = random.choice(list(self.word_table.keys()))
|
||||
output = [*fword]
|
||||
|
||||
while "." not in output[-1]:
|
||||
output.append(self.generate_word_by_word(tuple(output[-self.order :])))
|
||||
|
||||
return output
|
Reference in New Issue
Block a user