add textgen

2021-04-29 12:05:10 +02:00
parent b327a4743c
commit d29ad2ad08
6 changed files with 47298 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 /.env
 /__pycache__/
 /geckodriver.log
 /test.txt
--- a/bible.txt
+++ b/bible.txt
--- a/bot.py
+++ b/bot.py
@ -9,9 +9,11 @@ import asyncio
 from dotenv import load_dotenv
 import discord
 # from discord import Intents
 from scraper import Girls
 from textgen import TextGen
 load_dotenv()
 TOKEN = os.getenv("DISCORD_TOKEN")
@ -29,6 +31,7 @@ class HeidiClient(discord.Client):
        self.prefix_regex = "^" + self.prefix
        self.girls = Girls()  # scraped model list
        self.bible = TextGen("bible.txt", 3)
        self.triggers = {}  # automatic actions
        self.triggers[
@ -46,6 +49,7 @@ class HeidiClient(discord.Client):
        self.matchers["Countdown$"] = self.countdown
        self.matchers["gib Link"] = self.show_link
        self.matchers["welche Farbe .+\\?$"] = self.random_color
        self.matchers["zitiere die Bibel"] = self.generate_bible_quote
        ### Voicelines
@ -167,7 +171,7 @@ class HeidiClient(discord.Client):
        Countdown (Zeit bis zur nächsten Folge)
        """
        date = datetime.date.today()
-        while date.weekday() != 3: # 3 for thursday
+        while date.weekday() != 3:  # 3 for thursday
            date += datetime.timedelta(1)
        next_gntm = datetime.datetime(date.year, date.month, date.day, 20, 15)
@ -175,7 +179,9 @@ class HeidiClient(discord.Client):
        hours, rem = divmod(delta.seconds, 3600)
        minutes, seconds = divmod(rem, 60)
-        await message.channel.send(f"Noch {delta.days} Tage, {hours} Stunden und {minutes} Minuten bis zur nächsten Folge GNTM!")
+        await message.channel.send(
            f"Noch {delta.days} Tage, {hours} Stunden und {minutes} Minuten bis zur nächsten Folge GNTM!"
        )
    async def show_link(self, message):
        """
@ -190,9 +196,33 @@ class HeidiClient(discord.Client):
        """
        welche Farbe ... <Ding>? (Zufällige Farbe)
        """
-        choices = ["Rot", "Grün", "Gelb", "Blau", "Lila", "Pink", "Türkis", "Schwarz", "Weiß", "Grau", "Gelb", "Orange", "Olivegrün", "Mitternachtsblau", "Braun", "Tobe"]
+        choices = [
            "Rot",
            "Grün",
            "Gelb",
            "Blau",
            "Lila",
            "Pink",
            "Türkis",
            "Schwarz",
            "Weiß",
            "Grau",
            "Gelb",
            "Orange",
            "Olivegrün",
            "Mitternachtsblau",
            "Braun",
            "Tobe",
        ]
        await message.channel.send(random.choice(choices))
    async def generate_bible_quote(self, message):
        """
        zitiere die Bibel! (Unsinn generieren)
        """
        quote = self.bible.generate_sentence()
        await message.channel.send(" ".join(quote))
    ### Voiceboard ---------------------------------------------------------------------------------
    async def say_kein_foto(self, message):
--- a/faust.txt
+++ b/faust.txt
--- a/shell.nix
+++ b/shell.nix
@ -2,13 +2,28 @@
 with pkgs;
-let myPython = python38.buildEnv.override {
+let myPython = python39.buildEnv.override {
-      extraLibs = with python38Packages; [
+      extraLibs = with python39Packages; [
-        python-dotenv
+        # Common Libs
        rich
-        discordpy
+        # numpy
-        beautifulsoup4
+        # matplotlib
-        selenium
+        # scipy
        # torch
        # Doom Emacs Libs
        black
        pyflakes
        isort
        nose
        pytest
        # For Discord-Bot
        python-dotenv # Env
        discordpy # Discord
        beautifulsoup4 # Scraping
        selenium # Scraping
        pynacl # Voice
      ];
    };
 in
@ -16,7 +31,11 @@ in
 mkShell {
  buildInputs = [
    myPython
-    nodePackages.pyright
+    nodePackages.pyright # LSP
-    geckodriver
+    pipenv # Doom
    firefox # Selenium
    geckodriver # Selenium
    ffmpeg # Voice
  ];
 }
--- a/textgen.py
+++ b/textgen.py
@ -0,0 +1,68 @@
 #!/usr/bin/env python3
 from rich.traceback import install
 install()
 import re
 import random
 class TextGen:
    def __init__(self, filename, n):
        with open(filename) as file:
            self.wordbase = re.sub(r"[^a-zäöüß'.,]+", " ", file.read().lower()).split()
        self.word_table = dict()
        self.order = n
        self.train_words(self.order)
    def train_words(self, n):
        """
        Erzeugt die Markov-Chain mit Prefix-Länge n
        """
        print(f"Training with {len(self.wordbase)} words.")
        # init the frequencies
        for i in range(len(self.wordbase) - n - 1):
            prefix = tuple(self.wordbase[i : i + n])
            suffix = self.wordbase[i + n]
            if prefix not in self.word_table:
                self.word_table[prefix] = []
            # if suffix not in self.table[prefix]: # disable for probabilities
            self.word_table[prefix].append(suffix)
        print(f"Generated suffixes for {len(self.word_table)} prefixes.")
    def generate_random(self, n):
        fword = random.choice(list(self.word_table.keys()))
        output = [*fword]
        for _ in range(self.order, n):
            output.append(self.generate_word_by_word(tuple(output[-self.order :])))
        return output
    def generate_word_by_word(self, prefix: tuple):
        if prefix not in self.word_table:
            print(f"Prefix {prefix} not in table")
            for key in self.word_table.keys():
                if key[-1] == prefix[-1]:
                    return random.choice(self.word_table[key])
        return random.choice(self.word_table[prefix])
    def generate_sentences(self, n):
        return [self.generate_sentence for _ in range(n)]
    def generate_sentence(self):
        fword = random.choice(list(self.word_table.keys()))
        output = [*fword]
        while "." not in output[-1]:
            output.append(self.generate_word_by_word(tuple(output[-self.order :])))
        return output