predict parity per bit

jperl · jperl · commit 3ed1935f55e7 · 2018-08-09T15:54:14.000-04:00
- setup cli arguments for parameters
diff --git a/train.py b/train.py
@@ -1,42 +1,66 @@
+import argparse
 import torch
 from torch.utils.data import DataLoader
+from typing import NamedTuple
 from xor_dataset import XORDataset
+from utils import register_parser_types
 
-BATCH_SIZE = 32
-HIDDEN_SIZE = 1
-NUM_LAYERS = 1
 
-model = torch.nn.LSTM(
-    batch_first=True, input_size=1, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS)
+class ModelParams(NamedTuple):
+  # train loop
+  batch_size: int = 32
+  epochs: int = 10
 
-optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
-loss_fn = torch.nn.BCEWithLogitsLoss()
-train_loader = DataLoader(XORDataset(), batch_size=BATCH_SIZE, shuffle=True)
+  # lstm
+  hidden_size: int = 1
+  learning_rate: float = 1e-1
+  num_layers: int = 1
 
-step = 0
 
-for inputs, targets in train_loader:
-  # [batch, bits] -> [batch, bits, 1]
-  inputs = torch.unsqueeze(inputs, -1)
+def train(params: ModelParams):
+  model = torch.nn.LSTM(
+      batch_first=True, input_size=1, hidden_size=params.hidden_size, num_layers=params.num_layers)
 
-  # [1] -> [1, 1]
-  targets = torch.unsqueeze(targets, -1)
+  optimizer = torch.optim.SGD(model.parameters(), lr=params.learning_rate)
+  loss_fn = torch.nn.BCEWithLogitsLoss()
+  train_loader = DataLoader(XORDataset(), batch_size=params.batch_size, shuffle=True)
 
-  optimizer.zero_grad()
+  step = 0
 
-  # reset hidden state per sequence
-  h0 = c0 = inputs.new_zeros((NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE))
+  for epoch in range(1, params.epochs):
+    for inputs, targets in train_loader:
+      # [batch, bits] -> [batch, bits, 1]
+      inputs = torch.unsqueeze(inputs, -1)
 
-  final_outputs, _ = model(inputs, (h0, c0))
+      # [batch, parity] -> [batch, parity, 1]
+      targets = torch.unsqueeze(targets, -1)
 
-  # select the last prediction
-  # XXX we should calculate parity per bit in the lstm
-  loss = loss_fn(final_outputs[:, -1], targets)
+      optimizer.zero_grad()
 
-  loss.backward()
-  optimizer.step()
-  step += 1
+      # reset hidden state per sequence
+      h0 = c0 = inputs.new_zeros((params.num_layers, params.batch_size, params.hidden_size))
 
-  loss_val = loss.item()
-  if step % 100 == 0:
-    print(f'LOSS step {step}: {loss_val}')
+      final_outputs, _ = model(inputs, (h0, c0))
+
+      # select the last prediction
+      loss = loss_fn(final_outputs, targets)
+
+      loss.backward()
+      optimizer.step()
+      step += 1
+
+      loss_val = loss.item()
+      if step % 500 == 0:
+        print(f'epoch {epoch}, step {step}, loss {loss_val}')
+
+
+def get_arguments():
+  parser = argparse.ArgumentParser()
+  register_parser_types(parser, ModelParams)
+  arguments = parser.parse_args()
+  return arguments
+
+
+if __name__ == '__main__':
+  params = get_arguments()
+  train(params)
diff --git a/utils.py b/utils.py
@@ -1,14 +1,34 @@
 import os
 import shutil
+import typing
+
+# ------------------------- Parser Utils -------------------------
+
+
+def register_parser_types(parser, params_named_tuple):
+  """Register arguments based on the named tuple"""
+  # XXX upgrade to support dataclass instead after python 3.7.0
+  parser.register('type', bool, lambda v: v.lower() == 'true')
+  parser.register('type', typing.List[int], lambda v: tuple(map(int, v.split(','))))
+
+  hints = typing.get_type_hints(params_named_tuple)
+  defaults = params_named_tuple()._asdict()
+
+  for key, _type in hints.items():
+    parser.add_argument(f'--{key}', type=_type, default=defaults.get(key))
+
+
+# ------------------------- Path Utils -------------------------
+
 
 def ensure_path(path):
-  """Create the path if it does not exist
-  """
+  """Create the path if it does not exist"""
   if not os.path.exists(path):
     os.makedirs(path)
   return path
 
+
 def remove_path(path):
   """Remove the path if it exists."""
   if os.path.exists(path):
-    shutil.rmtree(path)
+    shutil.rmtree(path)