add textgen

2021-04-29 12:05:10 +02:00
parent b327a4743c
commit d29ad2ad08
6 changed files with 47298 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 /.env
 /__pycache__/
 /geckodriver.log
+/test.txt
--- a/bible.txt
+++ b/bible.txt
--- a/bot.py
+++ b/bot.py
@ -9,9 +9,11 @@ import asyncio
 from dotenv import load_dotenv

 import discord
+
 # from discord import Intents

 from scraper import Girls
+from textgen import TextGen

 load_dotenv()
 TOKEN = os.getenv("DISCORD_TOKEN")
@ -29,6 +31,7 @@ class HeidiClient(discord.Client):
        self.prefix_regex = "^" + self.prefix

        self.girls = Girls()  # scraped model list
+        self.bible = TextGen("bible.txt", 3)

        self.triggers = {}  # automatic actions
        self.triggers[
@ -46,6 +49,7 @@ class HeidiClient(discord.Client):
        self.matchers["Countdown$"] = self.countdown
        self.matchers["gib Link"] = self.show_link
        self.matchers["welche Farbe .+\\?$"] = self.random_color
+        self.matchers["zitiere die Bibel"] = self.generate_bible_quote

        ### Voicelines

@ -167,7 +171,7 @@ class HeidiClient(discord.Client):
        Countdown (Zeit bis zur nächsten Folge)
        """
        date = datetime.date.today()
-        while date.weekday() != 3: # 3 for thursday
+        while date.weekday() != 3:  # 3 for thursday
            date += datetime.timedelta(1)
        next_gntm = datetime.datetime(date.year, date.month, date.day, 20, 15)

@ -175,7 +179,9 @@ class HeidiClient(discord.Client):
        hours, rem = divmod(delta.seconds, 3600)
        minutes, seconds = divmod(rem, 60)

-        await message.channel.send(f"Noch {delta.days} Tage, {hours} Stunden und {minutes} Minuten bis zur nächsten Folge GNTM!")
+        await message.channel.send(
+            f"Noch {delta.days} Tage, {hours} Stunden und {minutes} Minuten bis zur nächsten Folge GNTM!"
+        )

    async def show_link(self, message):
        """
@ -190,9 +196,33 @@ class HeidiClient(discord.Client):
        """
        welche Farbe ... <Ding>? (Zufällige Farbe)
        """
-        choices = ["Rot", "Grün", "Gelb", "Blau", "Lila", "Pink", "Türkis", "Schwarz", "Weiß", "Grau", "Gelb", "Orange", "Olivegrün", "Mitternachtsblau", "Braun", "Tobe"]
+        choices = [
+            "Rot",
+            "Grün",
+            "Gelb",
+            "Blau",
+            "Lila",
+            "Pink",
+            "Türkis",
+            "Schwarz",
+            "Weiß",
+            "Grau",
+            "Gelb",
+            "Orange",
+            "Olivegrün",
+            "Mitternachtsblau",
+            "Braun",
+            "Tobe",
+        ]
        await message.channel.send(random.choice(choices))

+    async def generate_bible_quote(self, message):
+        """
+        zitiere die Bibel! (Unsinn generieren)
+        """
+        quote = self.bible.generate_sentence()
+        await message.channel.send(" ".join(quote))
+
    ### Voiceboard ---------------------------------------------------------------------------------

    async def say_kein_foto(self, message):
--- a/faust.txt
+++ b/faust.txt
--- a/shell.nix
+++ b/shell.nix
@ -2,13 +2,28 @@

 with pkgs;

-let myPython = python38.buildEnv.override {
-      extraLibs = with python38Packages; [
-        python-dotenv
+let myPython = python39.buildEnv.override {
+      extraLibs = with python39Packages; [
+        # Common Libs
        rich
-        discordpy
-        beautifulsoup4
-        selenium
+        # numpy
+        # matplotlib
+        # scipy
+        # torch
+        
+        # Doom Emacs Libs
+        black
+        pyflakes
+        isort
+        nose
+        pytest
+
+        # For Discord-Bot
+        python-dotenv # Env
+        discordpy # Discord
+        beautifulsoup4 # Scraping
+        selenium # Scraping
+        pynacl # Voice
      ];
    };
 in
@ -16,7 +31,11 @@ in
 mkShell {
  buildInputs = [
    myPython
-    nodePackages.pyright
-    geckodriver
+    nodePackages.pyright # LSP
+    pipenv # Doom
+
+    firefox # Selenium
+    geckodriver # Selenium
+    ffmpeg # Voice
  ];
 }
--- a/textgen.py
+++ b/textgen.py
@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+from rich.traceback import install
+
+install()
+
+import re
+import random
+
+
+class TextGen:
+    def __init__(self, filename, n):
+        with open(filename) as file:
+            self.wordbase = re.sub(r"[^a-zäöüß'.,]+", " ", file.read().lower()).split()
+
+        self.word_table = dict()
+        self.order = n
+
+        self.train_words(self.order)
+
+    def train_words(self, n):
+        """
+        Erzeugt die Markov-Chain mit Prefix-Länge n
+        """
+        print(f"Training with {len(self.wordbase)} words.")
+
+        # init the frequencies
+        for i in range(len(self.wordbase) - n - 1):
+            prefix = tuple(self.wordbase[i : i + n])
+            suffix = self.wordbase[i + n]
+
+            if prefix not in self.word_table:
+                self.word_table[prefix] = []
+
+            # if suffix not in self.table[prefix]: # disable for probabilities
+            self.word_table[prefix].append(suffix)
+
+        print(f"Generated suffixes for {len(self.word_table)} prefixes.")
+
+    def generate_random(self, n):
+        fword = random.choice(list(self.word_table.keys()))
+        output = [*fword]
+
+        for _ in range(self.order, n):
+            output.append(self.generate_word_by_word(tuple(output[-self.order :])))
+
+        return output
+
+    def generate_word_by_word(self, prefix: tuple):
+        if prefix not in self.word_table:
+            print(f"Prefix {prefix} not in table")
+            for key in self.word_table.keys():
+                if key[-1] == prefix[-1]:
+                    return random.choice(self.word_table[key])
+
+        return random.choice(self.word_table[prefix])
+
+    def generate_sentences(self, n):
+        return [self.generate_sentence for _ in range(n)]
+
+    def generate_sentence(self):
+        fword = random.choice(list(self.word_table.keys()))
+        output = [*fword]
+
+        while "." not in output[-1]:
+            output.append(self.generate_word_by_word(tuple(output[-self.order :])))
+
+        return output