RNN and LSTM

NLP tokenization and embedding layer


class BasicRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(BasicRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x: [batch_size, seq_length]
        embedded = self.embedding(x)
        # embedded: [batch_size, seq_length, embedding_dim]
        output, hidden = self.rnn(embedded)
        # output: [batch_size, seq_length, hidden_dim]
        # hidden: [1, batch_size, hidden_dim] - only considering last layer's output
        last_output = output[:, -1, :]
        # last_output: [batch_size, hidden_dim]
        prediction = self.fc(last_output)
        # prediction: [batch_size, output_dim]
        return prediction
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        # Replace the basic RNN with an LSTM
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x: [batch_size, seq_length]
        embedded = self.embedding(x)
        # embedded: [batch_size, seq_length, embedding_dim]
        output, (hidden, cell) = self.lstm(embedded)
        # output: [batch_size, seq_length, hidden_dim]
        # hidden: [1, batch_size, hidden_dim] - considering the last layer's last hidden state
        # cell: [1, batch_size, hidden_dim] - considering the last layer's last cell state
        last_output = output[:, -1, :]
        # last_output: [batch_size, hidden_dim]
        prediction = self.fc(last_output)
        # prediction: [batch_size, output_dim]
        return prediction
# Define hyperparameters
vocab_size = len(word_to_index)
hidden_size = 128
embedding_dim = 128
output_size = len(word_to_index)
learning_rate = 0.001
num_epochs = 10
batch_size = 64

# Convert data and targets to PyTorch tensors
data = torch.tensor(data, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Create DataLoader
dataset = TensorDataset(data, targets)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Initialize the model with the specified hyperparameters
model = BasicRNN(vocab_size=vocab_size, embedding_dim=embedding_dim, hidden_dim=hidden_size, output_dim=output_size).to(device)

# Define the loss function
loss_function = nn.CrossEntropyLoss()

# Choose an optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Function to train the model
def train_model(model, dataloader, optimizer, loss_function, num_epochs, device):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        total_loss = 0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)  # Transfer data to GPU
            optimizer.zero_grad()           # Clear the gradients
            predictions = model(inputs)     # Forward pass: compute the output class given a batch of inputs
            loss = loss_function(predictions, targets)  # Compute the loss
            loss.backward()                 # Backward pass: compute the gradient of the loss w.r.t. the model's parameters
            optimizer.step()                # Perform a single optimization step (parameter update)
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(dataloader)}')
        ```