From d5f1fbfbc478321228f8e9e0c2fd8a8dc5cc4581 Mon Sep 17 00:00:00 2001 From: ChUrl Date: Sun, 13 Nov 2022 14:26:21 +0100 Subject: [PATCH] update textgen lstm --- textgen_lstm.py | 161 +++++++++++++++++++++++++++--------------------- 1 file changed, 90 insertions(+), 71 deletions(-) diff --git a/textgen_lstm.py b/textgen_lstm.py index b6fc233..019b343 100644 --- a/textgen_lstm.py +++ b/textgen_lstm.py @@ -2,6 +2,7 @@ import re, random import numpy as np +import matplotlib.pyplot as plt import torch import torch.nn.functional as F from textgen import textgen @@ -16,9 +17,11 @@ install() class Model(nn.ModuleList): - def __init__(self, args): + def __init__(self, args, device): super(Model, self).__init__() + self.device = device + self.batch_size = args["batch_size"] self.hidden_dim = args["hidden_dim"] self.input_size = args["vocab_size"] @@ -26,7 +29,7 @@ class Model(nn.ModuleList): self.sequence_len = args["window"] # Dropout - self.dropout = nn.Dropout(0.25) + self.dropout = nn.Dropout(0.25) # Don't need to set device for the layers as we transfer the whole model later # Embedding layer self.embedding = nn.Embedding(self.input_size, self.hidden_dim, padding_idx=0) @@ -47,16 +50,16 @@ class Model(nn.ModuleList): # Bi-LSTM # hs = [batch_size x hidden_size] # cs = [batch_size x hidden_size] - hs_forward = torch.zeros(x.size(0), self.hidden_dim) - cs_forward = torch.zeros(x.size(0), self.hidden_dim) - hs_backward = torch.zeros(x.size(0), self.hidden_dim) - cs_backward = torch.zeros(x.size(0), self.hidden_dim) + hs_forward = torch.zeros(x.size(0), self.hidden_dim).to(self.device) # Need to specify device here as this is not part of the model directly + cs_forward = torch.zeros(x.size(0), self.hidden_dim).to(self.device) + hs_backward = torch.zeros(x.size(0), self.hidden_dim).to(self.device) + cs_backward = torch.zeros(x.size(0), self.hidden_dim).to(self.device) # LSTM # hs = [batch_size x (hidden_size * 2)] # cs = [batch_size x (hidden_size * 2)] - hs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2) - cs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2) + hs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2).to(self.device) + cs_lstm = torch.zeros(x.size(0), self.hidden_dim * 2).to(self.device) # Weights initialization torch.nn.init.kaiming_normal_(hs_forward) @@ -104,16 +107,43 @@ class LSTMTextGenerator(textgen): self.windowsize = windowsize # We slide a window over the character sequence and look at the next letter, # similar to the Markov chain order + def init(self, filename): + self.filename = filename + # Use this to generate one hot vector and filter characters self.letters = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", - "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", " "] + "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "ä", "ö", "ü", ".", " "] with open(f"./textfiles/{filename}.txt", "r") as file: lines = [line.lower() for line in file.readlines()] # lowercase list text = " ".join(lines) # single string self.charbase = [char for char in text if char in self.letters] # list of characters + # Select device + if torch.cuda.is_available(): + dev = "cuda:0" + print("Selected GPU for LSTM") + else: + dev = "cpu" + print("Selected CPU for LSTM") + self.device = torch.device(dev) + + # Init model + self.args = { + "window": self.windowsize, + "hidden_dim": 128, + "vocab_size": len(self.letters), + "batch_size": 128, + "learning_rate": 0.0005, + "num_epochs": 100 + } + self.model = Model(self.args, self.device) + self.model.to(self.device) # All model layers need to use the correct tensors (cpu/gpu) + + # Needed for both training and generation + self.__generate_char_sequences() + # Helper shit def __char_to_idx(self, char): @@ -148,72 +178,50 @@ class LSTMTextGenerator(textgen): # Interface shit + # TODO: Also save/load generated prefixes def load(self): - print(f"Loaded LSTM model with {len(self.charbase)} characters from file.") + print(f"Loading \"{self.filename}\" LSTM model with {len(self.charbase)} characters from file.") - # TODO: Deduplicate args - args = { - "window": self.windowsize, - "hidden_dim": 128, - "vocab_size": len(self.letters), - "batch_size": 128, - "learning_rate": 0.001, - "num_epochs": 50 - } - - self.model = Model(args) - - # model.load_state_dict(torch.load('weights/kommunistisches_manifest_lstm_model.pt')) + self.model.load_state_dict(torch.load(f"weights/{self.filename}_lstm_model.pt")) def train(self): - print(f"Training LSTM model with {len(self.charbase)} characters.") + print(f"Training \"{self.filename}\" LSTM model with {len(self.charbase)} characters.") - args = { - "window": self.windowsize, - "hidden_dim": 128, - "vocab_size": len(self.letters), - "batch_size": 128, - "learning_rate": 0.001, - "num_epochs": 50 - } - - self.__generate_char_sequences() - - # Model initialization - self.model = Model(args) - - # Optimizer initialization - optimizer = optim.RMSprop(self.model.parameters(), lr=args["learning_rate"]) + # Optimizer initialization, RMSprop for RNN + optimizer = optim.RMSprop(self.model.parameters(), lr=self.args["learning_rate"]) # Defining number of batches - num_batches = int(len(self.prefixes) / args["batch_size"]) + num_batches = int(len(self.prefixes) / self.args["batch_size"]) # Set model in training mode self.model.train() + losses = [] + # Training pahse - for epoch in range(args["num_epochs"]): + for epoch in range(self.args["num_epochs"]): # Mini batches for i in range(num_batches): # Batch definition try: - x_batch = self.prefixes[i * args["batch_size"] : (i + 1) * args["batch_size"]] - y_batch = self.suffixes[i * args["batch_size"] : (i + 1) * args["batch_size"]] + x_batch = self.prefixes[i * self.args["batch_size"]:(i + 1) * self.args["batch_size"]] + y_batch = self.suffixes[i * self.args["batch_size"]:(i + 1) * self.args["batch_size"]] except: - x_batch = self.prefixes[i * args["batch_size"] :] - y_batch = self.suffixes[i * args["batch_size"] :] + x_batch = self.prefixes[i * self.args["batch_size"]:] + y_batch = self.suffixes[i * self.args["batch_size"]:] # Convert numpy array into torch tensors - x = torch.from_numpy(x_batch).type(torch.long) - y = torch.from_numpy(y_batch).type(torch.long) + x = torch.from_numpy(x_batch).type(torch.long).to(self.device) + y = torch.from_numpy(y_batch).type(torch.long).to(self.device) # Feed the model y_pred = self.model(x) # Loss calculation - loss = F.cross_entropy(y_pred, y.squeeze()) + loss = F.cross_entropy(y_pred, y.squeeze()).to(self.device) + losses += [loss.item()] # Clean gradients optimizer.zero_grad() @@ -226,35 +234,44 @@ class LSTMTextGenerator(textgen): print("Epoch: %d , loss: %.5f " % (epoch, loss.item())) - torch.save(self.model.state_dict(), 'weights/kommunistisches_manifest_lstm_model.pt') + torch.save(self.model.state_dict(), f"weights/{self.filename}_lstm_model.pt") + print(f"Saved \"{self.filename}\" LSTM model to file") + + plt.plot(np.arange(0, len(losses)), losses) + plt.title(self.filename) + plt.show() def generate_sentence(self): + # Randomly is selected the index from the set of sequences + start = np.random.randint(0, len(self.prefixes)-1) + + # Convert back to string to match complete_sentence + pattern = "".join([self.__idx_to_char(char) for char in self.prefixes[start]]) # random sequence from the training text + + return self.complete_sentence(pattern) + + def complete_sentence(self, prefix): + print("Prefix:", prefix) + + # Convert to indexes np.array + pattern = np.array([self.__char_to_idx(char) for char in prefix]) + # Set the model in evalulation mode self.model.eval() # Define the softmax function - softmax = nn.Softmax(dim=1) - - # Randomly is selected the index from the set of sequences - start = np.random.randint(0, len(self.prefixes)-1) - - # The pattern is defined given the random idx - pattern = self.prefixes[start] - - # By making use of the dictionaries, it is printed the pattern - print("\nPattern: \n") - print(''.join([self.__idx_to_char(value) for value in pattern]), "\"") + softmax = nn.Softmax(dim=1).to(self.device) # In full_prediction we will save the complete prediction full_prediction = pattern.copy() - # the prediction starts, it is going to be predicted a given - # number of characters - for _ in range(250): + print("Generating sentence...") + # Predic the next characters one by one, append chars to the starting pattern until . is reached, max 500 iterations + for _ in range(500): # the numpy patterns is transformed into a tesor-type and reshaped - pattern = torch.from_numpy(pattern).type(torch.long) + pattern = torch.from_numpy(pattern).type(torch.long).to(self.device) pattern = pattern.view(1,-1) # make a prediction given the pattern @@ -263,12 +280,12 @@ class LSTMTextGenerator(textgen): prediction = softmax(prediction) # the prediction tensor is transformed into a numpy array - prediction = prediction.squeeze().detach().numpy() + prediction = prediction.squeeze().detach().cpu().numpy() # it is taken the idx with the highest probability arg_max = np.argmax(prediction) # the current pattern tensor is transformed into numpy array - pattern = pattern.squeeze().detach().numpy() + pattern = pattern.squeeze().detach().cpu().numpy() # the window is sliced 1 character to the right pattern = pattern[1:] # the new pattern is composed by the "old" pattern + the predicted character @@ -277,8 +294,10 @@ class LSTMTextGenerator(textgen): # the full prediction is saved full_prediction = np.append(full_prediction, arg_max) - print("prediction: \n") - print(''.join([self.__idx_to_char(value) for value in full_prediction]), "\"") + # Stop on . character + if self.__idx_to_char(arg_max) == ".": + break - def complete_sentence(self, prefix): - pass + full_prediction = "".join([self.__idx_to_char(value) for value in full_prediction]) + print("Generated:", full_prediction) + return full_prediction