Open
Description
📚 The doc issue
Hi guys,
I want to run an LSTM Model on my cortex-m4 architecture nondelegated. This is the model:
import torch
from torch import nn
from torch.export import export, export_for_training, ExportedProgram
from executorch.exir import ExecutorchBackendConfig, ExecutorchProgramManager
import executorch.exir as exir
from torch.utils.data import DataLoader, TensorDataset
import pytorch_lightning as pl
# Define the LSTM Model
class LSTMModel(pl.LightningModule):
def __init__(self, input_size, hidden_size):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
def forward(self, x):
lstm_out, _ = self.lstm(x)
return lstm_out
def training_step(self, batch, batch_idx):
x, y = batch
lstm_out = self(x) # Forward pass
# Align the loss computation with the target shape
lstm_last_step = lstm_out[:, -1, :] # Take the output of the last time step
loss = nn.functional.mse_loss(lstm_last_step, y) # Compute MSE loss
self.log("train_loss", loss)
return loss
def configure_optimizers(self):
return torch.optim.SGD(self.parameters(), lr=0.01)
# Dummy Data for Training
x_data = torch.randn(10, 5, 1) # Input: [batch_size, seq_length, input_size]
y_data = torch.randn(10, 3) # Target: [batch_size, hidden_size] for the last time step
dataset = TensorDataset(x_data, y_data)
train_loader = DataLoader(dataset, batch_size=3)
# Model Training
model = LSTMModel(input_size=1, hidden_size=3)
trainer = pl.Trainer(max_epochs=5, logger=False)
trainer.fit(model, train_loader)
# Forward-Only Export to TorchScript
model.eval()
example_input = torch.randn(1, 5, 1) # Example input
pre_autograd_aten_dialect = export_for_training(
LSTMModel(input_size=1, hidden_size=3),
(example_input,)
).module()
aten_dialect: ExportedProgram = export(pre_autograd_aten_dialect, (example_input,))
print(aten_dialect)
edge_program: exir.EdgeProgramManager = exir.to_edge(aten_dialect)
executorch_program: exir.ExecutorchProgramManager = edge_program.to_executorch(
ExecutorchBackendConfig(
passes=[], # User-defined passes
)
)
with open("model.pte", "wb") as file:
file.write(executorch_program.buffer)
print("LSTM-only model saved as model.pte")
The problem is that I cannot find the LSTM Aten operation in https://github.com/pytorch/executorch/blob/main/kernels/aten/functions.yaml
What am I doing wrong here?
I appreciate your help in advance :)
Cheers,
Chris
Suggest a potential alternative/fix
It would be great to have a document description that shows how to obtain the operations used in the aten representation.