From 0f6cc121824fe0a02a39f928641183517d8f145e Mon Sep 17 00:00:00 2001
From: Christoph Urlacher <christoph.urlacher@protonmail.com>
Date: Sat, 9 Dec 2023 17:36:53 +0100
Subject: [PATCH] Delete orphaned code

---
 .gitlab-ci.yml    |  38 ------
 launch.sh         |   9 --
 models.py         |  33 -----
 requirements.txt  |   9 --
 textgen.py        |  44 -------
 textgen_lstm.py   | 303 ----------------------------------------------
 textgen_markov.py |  82 -------------
 7 files changed, 518 deletions(-)
 delete mode 100644 .gitlab-ci.yml
 delete mode 100755 launch.sh
 delete mode 100644 models.py
 delete mode 100644 textgen.py
 delete mode 100644 textgen_lstm.py
 delete mode 100644 textgen_markov.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
deleted file mode 100644
index 4b86591..0000000
--- a/.gitlab-ci.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-workflow: # for entire pipeline
-  rules:
-    - if: '$CI_COMMIT_REF_NAME == "master"' # only run on master...
-      changes: # ...and when these files have changed
-      - "*.py"
-      - "Dockerfile"
-
-docker-build:
-  stage: build
-  image: docker:20 # provides the docker toolset (but without an active daemon)
-  services: # configure images that run during jobs linked to the image (above)
-  - docker:dind # dind build on docker and starts up the dockerdaemon (docker itself doesn't do that), which is needed to call docker build etc.
-  before_script:
-    - docker login -u $CI_REGISTRY_USER -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
-  script:
-    - docker pull $CI_REGISTRY_IMAGE:latest || true # latest image for cache (not failing if image is not found)
-    - >
-      docker build
-      --pull
-      --cache-from $CI_REGISTRY_IMAGE:latest
-      --label "org.opencontainers.image.title=$CI_PROJECT_TITLE"
-      --label "org.opencontainers.image.url=$CI_PROJECT_URL"
-      --label "org.opencontainers.image.created=$CI_JOB_STARTED_AT"
-      --label "org.opencontainers.image.revision=$CI_COMMIT_SHA"
-      --label "org.opencontainers.image.version=$CI_COMMIT_REF_NAME"
-      --tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
-      .
-    - docker tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA $CI_REGISTRY_IMAGE:latest
-    - docker push $CI_REGISTRY_IMAGE:latest
-
-docker-deploy:
-  stage: deploy
-  image: alpine:3.15
-  needs: ["docker-build"]
-  script:
-    - chmod og= $ID_RSA
-    - apk update && apk add openssh-client
-    - ssh -i $ID_RSA -o StrictHostKeyChecking=no $SERVER_USER@$SERVER_IP "/home/christoph/$CI_PROJECT_TITLE/launch.sh"
diff --git a/launch.sh b/launch.sh
deleted file mode 100755
index 652bf3a..0000000
--- a/launch.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-
-cd /home/christoph/HeidiBot
-git pull
-
-docker pull registry.gitlab.com/churl/heidibot
-docker container rm -f heidibot
-docker run -d --env-file /home/christoph/HeidiBot/.env --mount src=/home/christoph/HeidiBot/voicelines,target=/sounds,type=bind --name heidibot registry.gitlab.com/churl/heidibot
-docker image prune -f
diff --git a/models.py b/models.py
deleted file mode 100644
index b9c75f3..0000000
--- a/models.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env python3
-
-import requests
-import re
-from bs4 import BeautifulSoup
-
-
-class Models:
-    def __init__(self):
-        url_girls = "https://www.prosieben.de/tv/germanys-next-topmodel/models"
-
-        html_girls = requests.get(url_girls)
-        soup_girls = BeautifulSoup(html_girls.text, "html.parser")
-
-        girls_in = soup_girls.findAll("a", class_="candidate-in")
-        girls_out = soup_girls.findAll("a", class_="candidate-out")
-
-        self.girls_in = {girl.get("title").lower(): girl for girl in girls_in}
-        self.girls_out = {girl.get("title").lower(): girl for girl in girls_out}
-
-        self.girls = {**self.girls_in, **self.girls_out}
-
-    def get_in_names(self):
-        return self.girls_in.keys()
-
-    def get_out_names(self):
-        return self.girls_out.keys()
-
-    def get_image(self, name):
-        style = self.girls[name.lower()].find("figure", class_="teaser-img").get("style")
-        url = re.search(r"url\(.*\);", style).group()
-
-        return url[4:-9] + "562x996"  # increase resolution
diff --git a/requirements.txt b/requirements.txt
index c4cca16..abdbf5f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,12 +3,3 @@ rich
 discord.py # maintained again
 pynacl # voice support
 python-dotenv # discord guild secrets
-
-# Webscraping
-# requests
-# beautifulsoup4
-
-# Textgeneration
-# torch
-# numpy
-# nltk
diff --git a/textgen.py b/textgen.py
deleted file mode 100644
index d6b4e68..0000000
--- a/textgen.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python3
-
-from rich.traceback import install
-install()
-
-from abc import ABC, abstractmethod
-
-# In Python it is generally not needed to use abstract classes, but I wanted to do it safely
-
-class textgen(ABC):
-    @abstractmethod
-    def init(self, filename):
-        """
-        filename - The file (same directory as textgen.py) that contains the training text
-        """
-        raise NotImplementedError("Can't use abstract class")
-
-    @abstractmethod
-    def load(self):
-        """
-        Load the trained markov chain from a precomputed file
-        """
-        raise NotImplementedError("Can't use abstract class")
-
-    @abstractmethod
-    def train(self):
-        """
-        Generate the markov chain, uses prefix length defined in init()
-        """
-        raise NotImplementedError("Can't use abstract class")
-
-    @abstractmethod
-    def generate_sentence(self):
-        """
-        Generate a series of words/characters until a . is generated
-        """
-        raise NotImplementedError("Can't use abstract class")
-
-    @abstractmethod
-    def complete_sentence(self, prefix):
-        """
-        Generate the rest of a sentence for a given beginning
-        """
-        raise NotImplementedError("Can't use abstract class")
diff --git a/textgen_lstm.py b/textgen_lstm.py
deleted file mode 100644
index a752066..0000000
--- a/textgen_lstm.py
+++ /dev/null
@@ -1,303 +0,0 @@
-#!/usr/bin/env python3
-
-import re, random
-import numpy as np
-import matplotlib.pyplot as plt
-import torch
-import torch.nn.functional as F
-from textgen import textgen
-from torch import nn, optim
-
-from rich.traceback import install
-install()
-
-# Model =======================================================================================
-# https://towardsdatascience.com/text-generation-with-bi-lstm-in-pytorch-5fda6e7cc22c
-# Embedding -> Bi-LSTM -> LSTM -> Linear
-
-class Model(nn.ModuleList):
-
-    def __init__(self, args, device):
-        super(Model, self).__init__()
-
-        self.device = device
-
-        self.batch_size = args["batch_size"]
-        self.hidden_dim = args["hidden_dim"]
-        self.input_size = args["vocab_size"]
-        self.num_classes = args["vocab_size"]
-        self.sequence_len = args["window"]
-
-        # Dropout
-        self.dropout = nn.Dropout(0.25) # Don't need to set device for the layers as we transfer the whole model later
-
-        # Embedding layer
-        self.embedding = nn.Embedding(self.input_size, self.hidden_dim, padding_idx=0)
-
-        # Bi-LSTM
-        # Forward and backward
-        self.lstm_cell_forward = nn.LSTMCell(self.hidden_dim, self.hidden_dim)
-        self.lstm_cell_backward = nn.LSTMCell(self.hidden_dim, self.hidden_dim)
-
-        # LSTM layer
-        self.lstm_cell = nn.LSTMCell(self.hidden_dim * 2, self.hidden_dim * 2)
-
-        # Linear layer
-        self.linear = nn.Linear(self.hidden_dim * 2, self.num_classes)
-
-
-    def forward(self, x):
-        # Bi-LSTM
-        # hs = [batch_size x hidden_size]
-        # cs = [batch_size x hidden_size]
-        hs_forward = torch.zeros(x.size(0), self.hidden_dim).to(self.device) # Need to specify device here as this is not part of the model directly
-        cs_forward = torch.zeros(x.size(0), self.hidden_dim).to(self.device)
-        hs_backward = torch.zeros(x.size(0), self.hidden_dim).to(self.device)
-        cs_backward = torch.zeros(x.size(0), self.hidden_dim).to(self.device)
-
-        # LSTM
-        # hs = [batch_size x (hidden_size * 2)]
-        # cs = [batch_size x (hidden_size * 2)]
-        hs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2).to(self.device)
-        cs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2).to(self.device)
-
-        # Weights initialization
-        torch.nn.init.kaiming_normal_(hs_forward)
-        torch.nn.init.kaiming_normal_(cs_forward)
-        torch.nn.init.kaiming_normal_(hs_backward)
-        torch.nn.init.kaiming_normal_(cs_backward)
-        torch.nn.init.kaiming_normal_(hs_lstm)
-        torch.nn.init.kaiming_normal_(cs_lstm)
-
-        # From idx to embedding
-        out = self.embedding(x)
-
-        # Prepare the shape for LSTM Cells
-        out = out.view(self.sequence_len, x.size(0), -1)
-
-        forward = []
-        backward = []
-
-        # Unfolding Bi-LSTM
-        # Forward
-        for i in range(self.sequence_len):
-            hs_forward, cs_forward = self.lstm_cell_forward(out[i], (hs_forward, cs_forward))
-            forward.append(hs_forward)
-
-        # Backward
-        for i in reversed(range(self.sequence_len)):
-            hs_backward, cs_backward = self.lstm_cell_backward(out[i], (hs_backward, cs_backward))
-            backward.append(hs_backward)
-
-        # LSTM
-        for fwd, bwd in zip(forward, backward):
-            input_tensor = torch.cat((fwd, bwd), 1)
-            hs_lstm, cs_lstm = self.lstm_cell(input_tensor, (hs_lstm, cs_lstm))
-
-        # Last hidden state is passed through a linear layer
-        out = self.linear(hs_lstm)
-        return out
-
-
-# =============================================================================================
-
-class LSTMTextGenerator(textgen):
-
-    def __init__(self, windowsize):
-        self.windowsize = windowsize # We slide a window over the character sequence and look at the next letter,
-                                     # similar to the Markov chain order
-
-
-    def init(self, filename):
-        self.filename = filename
-
-        # Use this to generate one hot vector and filter characters
-        self.letters = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
-                        "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "ä", "ö", "ü", ".", " "]
-
-        with open(f"./textfiles/{filename}.txt", "r") as file:
-            lines = [line.lower() for line in file.readlines()] # lowercase list
-            text = " ".join(lines) # single string
-            self.charbase = [char for char in text if char in self.letters] # list of characters
-
-        # Select device
-        if torch.cuda.is_available():
-            dev = "cuda:0"
-            print("Selected GPU for LSTM")
-        else:
-            dev = "cpu"
-            print("Selected CPU for LSTM")
-        self.device = torch.device(dev)
-
-        # Init model
-        self.args = {
-            "window": self.windowsize,
-            "hidden_dim": 128,
-            "vocab_size": len(self.letters),
-            "batch_size": 128,
-            "learning_rate": 0.0005,
-            "num_epochs": 100
-        }
-        self.model = Model(self.args, self.device)
-        self.model.to(self.device) # All model layers need to use the correct tensors (cpu/gpu)
-
-        # Needed for both training and generation
-        self.__generate_char_sequences()
-
-    # Helper shit
-
-    def __char_to_idx(self, char):
-        return self.letters.index(char)
-
-    def __idx_to_char(self, idx):
-        return self.letters[idx]
-
-    def __generate_char_sequences(self):
-        # Example
-        # [[21, 20, 15],
-        #  [12, 12, 14]]
-        prefixes = []
-
-        # Example
-        # [[1],
-        #  [4]]
-        suffixes = []
-
-        print("Generating LSTM char sequences...")
-        for i in range(len(self.charbase) - self.windowsize - 1):
-            prefixes.append([self.__char_to_idx(char) for char in self.charbase[i:i+self.windowsize]])
-            suffixes += [self.__char_to_idx(char) for char in self.charbase[i+self.windowsize+1]] # Bit stupid wrapping this in a list but removes possible type error
-
-        # Enter numpy terretory NOW
-        self.prefixes = np.array(prefixes)
-        self.suffixes = np.array(suffixes)
-
-        print(f"Prefixes shape: {self.prefixes.shape}")
-        print(f"Suffixes shape: {self.suffixes.shape}")
-        print("Completed.")
-
-    # Interface shit
-
-    # @todo Also save/load generated prefixes
-    def load(self):
-        print(f"Loading \"{self.filename}\" LSTM model with {len(self.charbase)} characters from file.")
-
-        self.model.load_state_dict(torch.load(f"weights/{self.filename}_lstm_model.pt"))
-
-    def train(self):
-        print(f"Training \"{self.filename}\" LSTM model with {len(self.charbase)} characters.")
-
-        # Optimizer initialization, RMSprop for RNN
-        optimizer = optim.RMSprop(self.model.parameters(), lr=self.args["learning_rate"])
-
-        # Defining number of batches
-        num_batches = int(len(self.prefixes) / self.args["batch_size"])
-
-        # Set model in training mode
-        self.model.train()
-
-        losses = []
-
-        # Training pahse
-        for epoch in range(self.args["num_epochs"]):
-
-            # Mini batches
-            for i in range(num_batches):
-
-                # Batch definition
-                try:
-                    x_batch = self.prefixes[i * self.args["batch_size"]:(i + 1) * self.args["batch_size"]]
-                    y_batch = self.suffixes[i * self.args["batch_size"]:(i + 1) * self.args["batch_size"]]
-                except:
-                    x_batch = self.prefixes[i * self.args["batch_size"]:]
-                    y_batch = self.suffixes[i * self.args["batch_size"]:]
-
-                # Convert numpy array into torch tensors
-                x = torch.from_numpy(x_batch).type(torch.long).to(self.device)
-                y = torch.from_numpy(y_batch).type(torch.long).to(self.device)
-
-                # Feed the model
-                y_pred = self.model(x)
-
-                # Loss calculation
-                loss = F.cross_entropy(y_pred, y.squeeze()).to(self.device)
-                losses += [loss.item()]
-
-                # Clean gradients
-                optimizer.zero_grad()
-
-                # Calculate gradientes
-                loss.backward()
-
-                # Updated parameters
-                optimizer.step()
-
-                print("Epoch: %d ,  loss: %.5f " % (epoch, loss.item()))
-
-        torch.save(self.model.state_dict(), f"weights/{self.filename}_lstm_model.pt")
-        print(f"Saved \"{self.filename}\" LSTM model to file")
-
-        plt.plot(np.arange(0, len(losses)), losses)
-        plt.title(self.filename)
-        plt.show()
-
-
-    def generate_sentence(self):
-        # Randomly is selected the index from the set of sequences
-        start = np.random.randint(0, len(self.prefixes)-1)
-
-        # Convert back to string to match complete_sentence
-        pattern = "".join([self.__idx_to_char(char) for char in self.prefixes[start]]) # random sequence from the training text
-
-        return self.complete_sentence(pattern)
-
-    def complete_sentence(self, prefix):
-        print("Prefix:", prefix)
-
-        # Convert to indexes np.array
-        pattern = np.array([self.__char_to_idx(char) for char in prefix])
-
-        # Set the model in evalulation mode
-        self.model.eval()
-
-        # Define the softmax function
-        softmax = nn.Softmax(dim=1).to(self.device)
-
-        # In full_prediction we will save the complete prediction
-        full_prediction = pattern.copy()
-
-        print("Generating sentence...")
-
-        # Predic the next characters one by one, append chars to the starting pattern until . is reached, max 500 iterations
-        for _ in range(500):
-            # the numpy patterns is transformed into a tesor-type and reshaped
-            pattern = torch.from_numpy(pattern).type(torch.long).to(self.device)
-            pattern = pattern.view(1,-1)
-
-            # make a prediction given the pattern
-            prediction = self.model(pattern)
-            # it is applied the softmax function to the predicted tensor
-            prediction = softmax(prediction)
-
-            # the prediction tensor is transformed into a numpy array
-            prediction = prediction.squeeze().detach().cpu().numpy()
-            # it is taken the idx with the highest probability
-            arg_max = np.argmax(prediction)
-
-            # the current pattern tensor is transformed into numpy array
-            pattern = pattern.squeeze().detach().cpu().numpy()
-            # the window is sliced 1 character to the right
-            pattern = pattern[1:]
-            # the new pattern is composed by the "old" pattern + the predicted character
-            pattern = np.append(pattern, arg_max)
-
-            # the full prediction is saved
-            full_prediction = np.append(full_prediction, arg_max)
-
-            # Stop on . character
-            if self.__idx_to_char(arg_max) == ".":
-                break
-
-        full_prediction = "".join([self.__idx_to_char(value) for value in full_prediction])
-        print("Generated:", full_prediction)
-        return full_prediction
diff --git a/textgen_markov.py b/textgen_markov.py
deleted file mode 100644
index ccfb604..0000000
--- a/textgen_markov.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-
-import re
-import random
-from textgen import textgen
-
-from rich.traceback import install
-install()
-
-# NOTE: This is word based, not character based
-# @todo Serialize and save/load model (don't train on the server)
-# @todo Maybe extract sentence beginnings and use them as starters?
-
-class MarkovTextGenerator(textgen):
-    # The greater the order (prefix length), the lesser the variation in generation, but the better the sentences (generally).
-    # If the prefix length is high there are less options to choose from, so the sentences are very close to the training text.
-    def __init__(self, order): # Set order here for better interface (only needed for markov model)
-        self.order = order
-
-    def init(self, filename): # Filename is needed for every type of model so it's part of the interface
-        with open(f"./textfiles/{filename}.txt", "r") as file:
-            # Remove all characters except a-zäöüß'.,
-            self.wordbase = re.sub(r"[^a-zäöüß'.,]+", " ", file.read().lower()).split()
-
-        self.word_table = dict()
-
-    def load(self):
-        print(f"Loaded Markov chain of order {self.order} with {len(self.wordbase)} words from file.")
-
-    def train(self):
-        print(f"Training Markov chain of order {self.order} with {len(self.wordbase)} words.")
-
-        # init the frequencies
-        for i in range(len(self.wordbase) - self.order - 1): # Look at every word in range
-            prefix = tuple(self.wordbase[i:i+self.order]) # Look at the next self.order words from current position
-            suffix = self.wordbase[i+self.order] # The next word is the suffix
-
-            if prefix not in self.word_table: # New option wooo
-                self.word_table[prefix] = []
-
-            # if suffix not in self.table[prefix]: # disable for probabilities: if the suffixes are in the list multiple times they are more common
-            self.word_table[prefix].append(suffix)
-
-        print(f"Generated suffixes for {len(self.word_table)} prefixes.")
-
-    # def generate_random(self, n):
-    #     fword = random.choice(list(self.word_table.keys())) # Random first word
-    #     output = [*fword]
-
-    #     for _ in range(self.order, n):
-    #         output.append(self.generate_word_by_word(tuple(output[-self.order :])))
-
-    #     return output
-
-    def generate_suffix_for_prefix(self, prefix: tuple):
-        if len(prefix) > self.order: # In this case we look at the last self.order elements of prefix
-            prefix = prefix[len(prefix)-self.order-1:-1]
-
-        if prefix not in self.word_table: # In this case we need to choose a possible suffix from the last word in the prefix (if prefix too short for example)
-            print(f"Prefix {prefix} not in table")
-            for key in self.word_table.keys():
-                if key[-1] == prefix[-1]:
-                    return random.choice(self.word_table[key])
-
-        return random.choice(self.word_table[prefix])
-
-    def generate_sentence(self):
-        fword = random.choice(list(self.word_table.keys()))
-        output = [*fword]
-
-        while "." not in output[-1]:
-            output.append(self.generate_suffix_for_prefix(tuple(output[-self.order:])))
-
-        return output
-
-    def complete_sentence(self, prefix):
-        output = [*prefix]
-
-        while "." not in output[-1]:
-            output.append(self.generate_suffix_for_prefix(tuple(output[-self.order:])))
-
-        return output