Delete orphaned code
All checks were successful
Build Heidi Docker image / build-docker (push) Successful in 27s

This commit is contained in:
2023-12-09 17:36:53 +01:00
parent 9b66061ee7
commit 0f6cc12182
7 changed files with 0 additions and 518 deletions

View File

@ -1,38 +0,0 @@
workflow: # for entire pipeline
rules:
- if: '$CI_COMMIT_REF_NAME == "master"' # only run on master...
changes: # ...and when these files have changed
- "*.py"
- "Dockerfile"
docker-build:
stage: build
image: docker:20 # provides the docker toolset (but without an active daemon)
services: # configure images that run during jobs linked to the image (above)
- docker:dind # dind build on docker and starts up the dockerdaemon (docker itself doesn't do that), which is needed to call docker build etc.
before_script:
- docker login -u $CI_REGISTRY_USER -p "$CI_REGISTRY_PASSWORD" $CI_REGISTRY
script:
- docker pull $CI_REGISTRY_IMAGE:latest || true # latest image for cache (not failing if image is not found)
- >
docker build
--pull
--cache-from $CI_REGISTRY_IMAGE:latest
--label "org.opencontainers.image.title=$CI_PROJECT_TITLE"
--label "org.opencontainers.image.url=$CI_PROJECT_URL"
--label "org.opencontainers.image.created=$CI_JOB_STARTED_AT"
--label "org.opencontainers.image.revision=$CI_COMMIT_SHA"
--label "org.opencontainers.image.version=$CI_COMMIT_REF_NAME"
--tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
.
- docker tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA $CI_REGISTRY_IMAGE:latest
- docker push $CI_REGISTRY_IMAGE:latest
docker-deploy:
stage: deploy
image: alpine:3.15
needs: ["docker-build"]
script:
- chmod og= $ID_RSA
- apk update && apk add openssh-client
- ssh -i $ID_RSA -o StrictHostKeyChecking=no $SERVER_USER@$SERVER_IP "/home/christoph/$CI_PROJECT_TITLE/launch.sh"

View File

@ -1,9 +0,0 @@
#!/bin/sh
cd /home/christoph/HeidiBot
git pull
docker pull registry.gitlab.com/churl/heidibot
docker container rm -f heidibot
docker run -d --env-file /home/christoph/HeidiBot/.env --mount src=/home/christoph/HeidiBot/voicelines,target=/sounds,type=bind --name heidibot registry.gitlab.com/churl/heidibot
docker image prune -f

View File

@ -1,33 +0,0 @@
#!/usr/bin/env python3
import requests
import re
from bs4 import BeautifulSoup
class Models:
def __init__(self):
url_girls = "https://www.prosieben.de/tv/germanys-next-topmodel/models"
html_girls = requests.get(url_girls)
soup_girls = BeautifulSoup(html_girls.text, "html.parser")
girls_in = soup_girls.findAll("a", class_="candidate-in")
girls_out = soup_girls.findAll("a", class_="candidate-out")
self.girls_in = {girl.get("title").lower(): girl for girl in girls_in}
self.girls_out = {girl.get("title").lower(): girl for girl in girls_out}
self.girls = {**self.girls_in, **self.girls_out}
def get_in_names(self):
return self.girls_in.keys()
def get_out_names(self):
return self.girls_out.keys()
def get_image(self, name):
style = self.girls[name.lower()].find("figure", class_="teaser-img").get("style")
url = re.search(r"url\(.*\);", style).group()
return url[4:-9] + "562x996" # increase resolution

View File

@ -3,12 +3,3 @@ rich
discord.py # maintained again
pynacl # voice support
python-dotenv # discord guild secrets
# Webscraping
# requests
# beautifulsoup4
# Textgeneration
# torch
# numpy
# nltk

View File

@ -1,44 +0,0 @@
#!/usr/bin/env python3
from rich.traceback import install
install()
from abc import ABC, abstractmethod
# In Python it is generally not needed to use abstract classes, but I wanted to do it safely
class textgen(ABC):
@abstractmethod
def init(self, filename):
"""
filename - The file (same directory as textgen.py) that contains the training text
"""
raise NotImplementedError("Can't use abstract class")
@abstractmethod
def load(self):
"""
Load the trained markov chain from a precomputed file
"""
raise NotImplementedError("Can't use abstract class")
@abstractmethod
def train(self):
"""
Generate the markov chain, uses prefix length defined in init()
"""
raise NotImplementedError("Can't use abstract class")
@abstractmethod
def generate_sentence(self):
"""
Generate a series of words/characters until a . is generated
"""
raise NotImplementedError("Can't use abstract class")
@abstractmethod
def complete_sentence(self, prefix):
"""
Generate the rest of a sentence for a given beginning
"""
raise NotImplementedError("Can't use abstract class")

View File

@ -1,303 +0,0 @@
#!/usr/bin/env python3
import re, random
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from textgen import textgen
from torch import nn, optim
from rich.traceback import install
install()
# Model =======================================================================================
# https://towardsdatascience.com/text-generation-with-bi-lstm-in-pytorch-5fda6e7cc22c
# Embedding -> Bi-LSTM -> LSTM -> Linear
class Model(nn.ModuleList):
def __init__(self, args, device):
super(Model, self).__init__()
self.device = device
self.batch_size = args["batch_size"]
self.hidden_dim = args["hidden_dim"]
self.input_size = args["vocab_size"]
self.num_classes = args["vocab_size"]
self.sequence_len = args["window"]
# Dropout
self.dropout = nn.Dropout(0.25) # Don't need to set device for the layers as we transfer the whole model later
# Embedding layer
self.embedding = nn.Embedding(self.input_size, self.hidden_dim, padding_idx=0)
# Bi-LSTM
# Forward and backward
self.lstm_cell_forward = nn.LSTMCell(self.hidden_dim, self.hidden_dim)
self.lstm_cell_backward = nn.LSTMCell(self.hidden_dim, self.hidden_dim)
# LSTM layer
self.lstm_cell = nn.LSTMCell(self.hidden_dim * 2, self.hidden_dim * 2)
# Linear layer
self.linear = nn.Linear(self.hidden_dim * 2, self.num_classes)
def forward(self, x):
# Bi-LSTM
# hs = [batch_size x hidden_size]
# cs = [batch_size x hidden_size]
hs_forward = torch.zeros(x.size(0), self.hidden_dim).to(self.device) # Need to specify device here as this is not part of the model directly
cs_forward = torch.zeros(x.size(0), self.hidden_dim).to(self.device)
hs_backward = torch.zeros(x.size(0), self.hidden_dim).to(self.device)
cs_backward = torch.zeros(x.size(0), self.hidden_dim).to(self.device)
# LSTM
# hs = [batch_size x (hidden_size * 2)]
# cs = [batch_size x (hidden_size * 2)]
hs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2).to(self.device)
cs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2).to(self.device)
# Weights initialization
torch.nn.init.kaiming_normal_(hs_forward)
torch.nn.init.kaiming_normal_(cs_forward)
torch.nn.init.kaiming_normal_(hs_backward)
torch.nn.init.kaiming_normal_(cs_backward)
torch.nn.init.kaiming_normal_(hs_lstm)
torch.nn.init.kaiming_normal_(cs_lstm)
# From idx to embedding
out = self.embedding(x)
# Prepare the shape for LSTM Cells
out = out.view(self.sequence_len, x.size(0), -1)
forward = []
backward = []
# Unfolding Bi-LSTM
# Forward
for i in range(self.sequence_len):
hs_forward, cs_forward = self.lstm_cell_forward(out[i], (hs_forward, cs_forward))
forward.append(hs_forward)
# Backward
for i in reversed(range(self.sequence_len)):
hs_backward, cs_backward = self.lstm_cell_backward(out[i], (hs_backward, cs_backward))
backward.append(hs_backward)
# LSTM
for fwd, bwd in zip(forward, backward):
input_tensor = torch.cat((fwd, bwd), 1)
hs_lstm, cs_lstm = self.lstm_cell(input_tensor, (hs_lstm, cs_lstm))
# Last hidden state is passed through a linear layer
out = self.linear(hs_lstm)
return out
# =============================================================================================
class LSTMTextGenerator(textgen):
def __init__(self, windowsize):
self.windowsize = windowsize # We slide a window over the character sequence and look at the next letter,
# similar to the Markov chain order
def init(self, filename):
self.filename = filename
# Use this to generate one hot vector and filter characters
self.letters = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "ä", "ö", "ü", ".", " "]
with open(f"./textfiles/{filename}.txt", "r") as file:
lines = [line.lower() for line in file.readlines()] # lowercase list
text = " ".join(lines) # single string
self.charbase = [char for char in text if char in self.letters] # list of characters
# Select device
if torch.cuda.is_available():
dev = "cuda:0"
print("Selected GPU for LSTM")
else:
dev = "cpu"
print("Selected CPU for LSTM")
self.device = torch.device(dev)
# Init model
self.args = {
"window": self.windowsize,
"hidden_dim": 128,
"vocab_size": len(self.letters),
"batch_size": 128,
"learning_rate": 0.0005,
"num_epochs": 100
}
self.model = Model(self.args, self.device)
self.model.to(self.device) # All model layers need to use the correct tensors (cpu/gpu)
# Needed for both training and generation
self.__generate_char_sequences()
# Helper shit
def __char_to_idx(self, char):
return self.letters.index(char)
def __idx_to_char(self, idx):
return self.letters[idx]
def __generate_char_sequences(self):
# Example
# [[21, 20, 15],
# [12, 12, 14]]
prefixes = []
# Example
# [[1],
# [4]]
suffixes = []
print("Generating LSTM char sequences...")
for i in range(len(self.charbase) - self.windowsize - 1):
prefixes.append([self.__char_to_idx(char) for char in self.charbase[i:i+self.windowsize]])
suffixes += [self.__char_to_idx(char) for char in self.charbase[i+self.windowsize+1]] # Bit stupid wrapping this in a list but removes possible type error
# Enter numpy terretory NOW
self.prefixes = np.array(prefixes)
self.suffixes = np.array(suffixes)
print(f"Prefixes shape: {self.prefixes.shape}")
print(f"Suffixes shape: {self.suffixes.shape}")
print("Completed.")
# Interface shit
# @todo Also save/load generated prefixes
def load(self):
print(f"Loading \"{self.filename}\" LSTM model with {len(self.charbase)} characters from file.")
self.model.load_state_dict(torch.load(f"weights/{self.filename}_lstm_model.pt"))
def train(self):
print(f"Training \"{self.filename}\" LSTM model with {len(self.charbase)} characters.")
# Optimizer initialization, RMSprop for RNN
optimizer = optim.RMSprop(self.model.parameters(), lr=self.args["learning_rate"])
# Defining number of batches
num_batches = int(len(self.prefixes) / self.args["batch_size"])
# Set model in training mode
self.model.train()
losses = []
# Training pahse
for epoch in range(self.args["num_epochs"]):
# Mini batches
for i in range(num_batches):
# Batch definition
try:
x_batch = self.prefixes[i * self.args["batch_size"]:(i + 1) * self.args["batch_size"]]
y_batch = self.suffixes[i * self.args["batch_size"]:(i + 1) * self.args["batch_size"]]
except:
x_batch = self.prefixes[i * self.args["batch_size"]:]
y_batch = self.suffixes[i * self.args["batch_size"]:]
# Convert numpy array into torch tensors
x = torch.from_numpy(x_batch).type(torch.long).to(self.device)
y = torch.from_numpy(y_batch).type(torch.long).to(self.device)
# Feed the model
y_pred = self.model(x)
# Loss calculation
loss = F.cross_entropy(y_pred, y.squeeze()).to(self.device)
losses += [loss.item()]
# Clean gradients
optimizer.zero_grad()
# Calculate gradientes
loss.backward()
# Updated parameters
optimizer.step()
print("Epoch: %d , loss: %.5f " % (epoch, loss.item()))
torch.save(self.model.state_dict(), f"weights/{self.filename}_lstm_model.pt")
print(f"Saved \"{self.filename}\" LSTM model to file")
plt.plot(np.arange(0, len(losses)), losses)
plt.title(self.filename)
plt.show()
def generate_sentence(self):
# Randomly is selected the index from the set of sequences
start = np.random.randint(0, len(self.prefixes)-1)
# Convert back to string to match complete_sentence
pattern = "".join([self.__idx_to_char(char) for char in self.prefixes[start]]) # random sequence from the training text
return self.complete_sentence(pattern)
def complete_sentence(self, prefix):
print("Prefix:", prefix)
# Convert to indexes np.array
pattern = np.array([self.__char_to_idx(char) for char in prefix])
# Set the model in evalulation mode
self.model.eval()
# Define the softmax function
softmax = nn.Softmax(dim=1).to(self.device)
# In full_prediction we will save the complete prediction
full_prediction = pattern.copy()
print("Generating sentence...")
# Predic the next characters one by one, append chars to the starting pattern until . is reached, max 500 iterations
for _ in range(500):
# the numpy patterns is transformed into a tesor-type and reshaped
pattern = torch.from_numpy(pattern).type(torch.long).to(self.device)
pattern = pattern.view(1,-1)
# make a prediction given the pattern
prediction = self.model(pattern)
# it is applied the softmax function to the predicted tensor
prediction = softmax(prediction)
# the prediction tensor is transformed into a numpy array
prediction = prediction.squeeze().detach().cpu().numpy()
# it is taken the idx with the highest probability
arg_max = np.argmax(prediction)
# the current pattern tensor is transformed into numpy array
pattern = pattern.squeeze().detach().cpu().numpy()
# the window is sliced 1 character to the right
pattern = pattern[1:]
# the new pattern is composed by the "old" pattern + the predicted character
pattern = np.append(pattern, arg_max)
# the full prediction is saved
full_prediction = np.append(full_prediction, arg_max)
# Stop on . character
if self.__idx_to_char(arg_max) == ".":
break
full_prediction = "".join([self.__idx_to_char(value) for value in full_prediction])
print("Generated:", full_prediction)
return full_prediction

View File

@ -1,82 +0,0 @@
#!/usr/bin/env python3
import re
import random
from textgen import textgen
from rich.traceback import install
install()
# NOTE: This is word based, not character based
# @todo Serialize and save/load model (don't train on the server)
# @todo Maybe extract sentence beginnings and use them as starters?
class MarkovTextGenerator(textgen):
# The greater the order (prefix length), the lesser the variation in generation, but the better the sentences (generally).
# If the prefix length is high there are less options to choose from, so the sentences are very close to the training text.
def __init__(self, order): # Set order here for better interface (only needed for markov model)
self.order = order
def init(self, filename): # Filename is needed for every type of model so it's part of the interface
with open(f"./textfiles/{filename}.txt", "r") as file:
# Remove all characters except a-zäöüß'.,
self.wordbase = re.sub(r"[^a-zäöüß'.,]+", " ", file.read().lower()).split()
self.word_table = dict()
def load(self):
print(f"Loaded Markov chain of order {self.order} with {len(self.wordbase)} words from file.")
def train(self):
print(f"Training Markov chain of order {self.order} with {len(self.wordbase)} words.")
# init the frequencies
for i in range(len(self.wordbase) - self.order - 1): # Look at every word in range
prefix = tuple(self.wordbase[i:i+self.order]) # Look at the next self.order words from current position
suffix = self.wordbase[i+self.order] # The next word is the suffix
if prefix not in self.word_table: # New option wooo
self.word_table[prefix] = []
# if suffix not in self.table[prefix]: # disable for probabilities: if the suffixes are in the list multiple times they are more common
self.word_table[prefix].append(suffix)
print(f"Generated suffixes for {len(self.word_table)} prefixes.")
# def generate_random(self, n):
# fword = random.choice(list(self.word_table.keys())) # Random first word
# output = [*fword]
# for _ in range(self.order, n):
# output.append(self.generate_word_by_word(tuple(output[-self.order :])))
# return output
def generate_suffix_for_prefix(self, prefix: tuple):
if len(prefix) > self.order: # In this case we look at the last self.order elements of prefix
prefix = prefix[len(prefix)-self.order-1:-1]
if prefix not in self.word_table: # In this case we need to choose a possible suffix from the last word in the prefix (if prefix too short for example)
print(f"Prefix {prefix} not in table")
for key in self.word_table.keys():
if key[-1] == prefix[-1]:
return random.choice(self.word_table[key])
return random.choice(self.word_table[prefix])
def generate_sentence(self):
fword = random.choice(list(self.word_table.keys()))
output = [*fword]
while "." not in output[-1]:
output.append(self.generate_suffix_for_prefix(tuple(output[-self.order:])))
return output
def complete_sentence(self, prefix):
output = [*prefix]
while "." not in output[-1]:
output.append(self.generate_suffix_for_prefix(tuple(output[-self.order:])))
return output