add textgen

This commit is contained in:
ChUrl
2021-04-29 12:05:10 +02:00
parent b327a4743c
commit d29ad2ad08
6 changed files with 47298 additions and 11 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
/.env
/__pycache__/
/geckodriver.log
/test.txt

30383
bible.txt Normal file

File diff suppressed because it is too large Load Diff

36
bot.py
View File

@ -9,9 +9,11 @@ import asyncio
from dotenv import load_dotenv
import discord
# from discord import Intents
from scraper import Girls
from textgen import TextGen
load_dotenv()
TOKEN = os.getenv("DISCORD_TOKEN")
@ -29,6 +31,7 @@ class HeidiClient(discord.Client):
self.prefix_regex = "^" + self.prefix
self.girls = Girls() # scraped model list
self.bible = TextGen("bible.txt", 3)
self.triggers = {} # automatic actions
self.triggers[
@ -46,6 +49,7 @@ class HeidiClient(discord.Client):
self.matchers["Countdown$"] = self.countdown
self.matchers["gib Link"] = self.show_link
self.matchers["welche Farbe .+\\?$"] = self.random_color
self.matchers["zitiere die Bibel"] = self.generate_bible_quote
### Voicelines
@ -167,7 +171,7 @@ class HeidiClient(discord.Client):
Countdown (Zeit bis zur nächsten Folge)
"""
date = datetime.date.today()
while date.weekday() != 3: # 3 for thursday
while date.weekday() != 3: # 3 for thursday
date += datetime.timedelta(1)
next_gntm = datetime.datetime(date.year, date.month, date.day, 20, 15)
@ -175,7 +179,9 @@ class HeidiClient(discord.Client):
hours, rem = divmod(delta.seconds, 3600)
minutes, seconds = divmod(rem, 60)
await message.channel.send(f"Noch {delta.days} Tage, {hours} Stunden und {minutes} Minuten bis zur nächsten Folge GNTM!")
await message.channel.send(
f"Noch {delta.days} Tage, {hours} Stunden und {minutes} Minuten bis zur nächsten Folge GNTM!"
)
async def show_link(self, message):
"""
@ -190,9 +196,33 @@ class HeidiClient(discord.Client):
"""
welche Farbe ... <Ding>? (Zufällige Farbe)
"""
choices = ["Rot", "Grün", "Gelb", "Blau", "Lila", "Pink", "Türkis", "Schwarz", "Weiß", "Grau", "Gelb", "Orange", "Olivegrün", "Mitternachtsblau", "Braun", "Tobe"]
choices = [
"Rot",
"Grün",
"Gelb",
"Blau",
"Lila",
"Pink",
"Türkis",
"Schwarz",
"Weiß",
"Grau",
"Gelb",
"Orange",
"Olivegrün",
"Mitternachtsblau",
"Braun",
"Tobe",
]
await message.channel.send(random.choice(choices))
async def generate_bible_quote(self, message):
"""
zitiere die Bibel! (Unsinn generieren)
"""
quote = self.bible.generate_sentence()
await message.channel.send(" ".join(quote))
### Voiceboard ---------------------------------------------------------------------------------
async def say_kein_foto(self, message):

16786
faust.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2,13 +2,28 @@
with pkgs;
let myPython = python38.buildEnv.override {
extraLibs = with python38Packages; [
python-dotenv
let myPython = python39.buildEnv.override {
extraLibs = with python39Packages; [
# Common Libs
rich
discordpy
beautifulsoup4
selenium
# numpy
# matplotlib
# scipy
# torch
# Doom Emacs Libs
black
pyflakes
isort
nose
pytest
# For Discord-Bot
python-dotenv # Env
discordpy # Discord
beautifulsoup4 # Scraping
selenium # Scraping
pynacl # Voice
];
};
in
@ -16,7 +31,11 @@ in
mkShell {
buildInputs = [
myPython
nodePackages.pyright
geckodriver
nodePackages.pyright # LSP
pipenv # Doom
firefox # Selenium
geckodriver # Selenium
ffmpeg # Voice
];
}

68
textgen.py Normal file
View File

@ -0,0 +1,68 @@
#!/usr/bin/env python3
from rich.traceback import install
install()
import re
import random
class TextGen:
def __init__(self, filename, n):
with open(filename) as file:
self.wordbase = re.sub(r"[^a-zäöüß'.,]+", " ", file.read().lower()).split()
self.word_table = dict()
self.order = n
self.train_words(self.order)
def train_words(self, n):
"""
Erzeugt die Markov-Chain mit Prefix-Länge n
"""
print(f"Training with {len(self.wordbase)} words.")
# init the frequencies
for i in range(len(self.wordbase) - n - 1):
prefix = tuple(self.wordbase[i : i + n])
suffix = self.wordbase[i + n]
if prefix not in self.word_table:
self.word_table[prefix] = []
# if suffix not in self.table[prefix]: # disable for probabilities
self.word_table[prefix].append(suffix)
print(f"Generated suffixes for {len(self.word_table)} prefixes.")
def generate_random(self, n):
fword = random.choice(list(self.word_table.keys()))
output = [*fword]
for _ in range(self.order, n):
output.append(self.generate_word_by_word(tuple(output[-self.order :])))
return output
def generate_word_by_word(self, prefix: tuple):
if prefix not in self.word_table:
print(f"Prefix {prefix} not in table")
for key in self.word_table.keys():
if key[-1] == prefix[-1]:
return random.choice(self.word_table[key])
return random.choice(self.word_table[prefix])
def generate_sentences(self, n):
return [self.generate_sentence for _ in range(n)]
def generate_sentence(self):
fword = random.choice(list(self.word_table.keys()))
output = [*fword]
while "." not in output[-1]:
output.append(self.generate_word_by_word(tuple(output[-self.order :])))
return output