Various Fabric documentation updates (Lightning-AI#17236)

carmocca · web-flow · commit 1aa23267abd1 · 2023-04-11T23:05:57.000Z
diff --git a/docs/source-fabric/api/fabric_args.rst b/docs/source-fabric/api/fabric_args.rst
@@ -78,7 +78,7 @@ Configure the devices to run on. Can be of type:
     # int: run on two GPUs
     fabric = Fabric(devices=2, accelerator="gpu")
 
-    # list: run on GPUs 1, 4 (by bus ordering)
+    # list: run on the 2nd (idx 1) and 5th (idx 4) GPUs (by bus ordering)
     fabric = Fabric(devices=[1, 4], accelerator="gpu")
     fabric = Fabric(devices="1, 4", accelerator="gpu")  # equivalent
 
diff --git a/docs/source-fabric/fundamentals/convert.rst b/docs/source-fabric/fundamentals/convert.rst
@@ -54,9 +54,7 @@ All steps combined, this is how your code will change:
 .. code-block:: diff
 
       import torch
-      import torch.nn.functional as F
       from lightning.pytorch.demos import WikiText2, Transformer
-
     + import lightning as L
 
     - device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -79,7 +77,7 @@ All steps combined, this is how your code will change:
     -         input, target = input.to(device), target.to(device)
               optimizer.zero_grad()
               output = model(input, target)
-              loss = F.nll_loss(output, target.view(-1))
+              loss = torch.nn.functional.nll_loss(output, target.view(-1))
     -         loss.backward()
     +         fabric.backward(loss)
               optimizer.step()
diff --git a/docs/source-fabric/index.rst b/docs/source-fabric/index.rst
@@ -17,9 +17,7 @@ Fabric is the fast and lightweight way to scale PyTorch models without boilerpla
 .. code-block:: diff
 
       import torch
-      import torch.nn.functional as F
       from lightning.pytorch.demos import WikiText2, Transformer
-
     + import lightning as L
 
     - device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -42,7 +40,7 @@ Fabric is the fast and lightweight way to scale PyTorch models without boilerpla
     -         input, target = input.to(device), target.to(device)
               optimizer.zero_grad()
               output = model(input, target)
-              loss = F.nll_loss(output, target.view(-1))
+              loss = torch.nn.functional.nll_loss(output, target.view(-1))
     -         loss.backward()
     +         fabric.backward(loss)
               optimizer.step()
diff --git a/docs/source-pytorch/accelerators/gpu_basic.rst b/docs/source-pytorch/accelerators/gpu_basic.rst
@@ -66,7 +66,7 @@ a comma separated list of GPU ids:
     Trainer(accelerator="gpu", devices="0, 1")
 
     # To use all available GPUs put -1 or '-1'
-    # equivalent to list(range(torch.cuda.device_count()))
+    # equivalent to `list(range(torch.cuda.device_count())) and `"auto"`
     Trainer(accelerator="gpu", devices=-1)
 
 The table below lists examples of possible input formats and how they are interpreted by Lightning.
@@ -80,11 +80,11 @@ The table below lists examples of possible input formats and how they are interp
 +------------------+-----------+---------------------+---------------------------------+
 | [0]              | list      | [0]                 | GPU 0                           |
 +------------------+-----------+---------------------+---------------------------------+
-| [1, 3]           | list      | [1, 3]              | GPUs 1 and 3                    |
+| [1, 3]           | list      | [1, 3]              | GPU index 1 and 3 (0-based)     |
 +------------------+-----------+---------------------+---------------------------------+
 | "3"              | str       | [0, 1, 2]           | first 3 GPUs                    |
 +------------------+-----------+---------------------+---------------------------------+
-| "1, 3"           | str       | [1, 3]              | GPUs 1 and 3                    |
+| "1, 3"           | str       | [1, 3]              | GPU index 1 and 3 (0-based)     |
 +------------------+-----------+---------------------+---------------------------------+
 | "-1"             | str       | [0, 1, 2, ...]      | all available GPUs              |
 +------------------+-----------+---------------------+---------------------------------+
diff --git a/src/lightning_fabric/README.md b/src/lightning_fabric/README.md
@@ -22,53 +22,182 @@ ______________________________________________________________________
 
 </div>
 
-## Maximum flexibility, minimum code changes
+# Lightning Fabric: Expert control.
 
-With just a few code changes, run any PyTorch model on any distributed hardware, no boilerplate!
+Run on any device at any scale with expert-level control over PyTorch training loop and scaling strategy. You can even write your own Trainer.
 
-- Easily switch from running on CPU to GPU (Apple Silicon, CUDA, …), TPU, multi-GPU or even multi-node training
-- Use state-of-the-art distributed training strategies (DDP, FSDP, DeepSpeed) and mixed precision out of the box
-- All the device logic boilerplate is handled for you
-- Designed with multi-billion parameter models in mind
-- Build your own custom Trainer using Fabric primitives for training checkpointing, logging, and more
+Fabric is designed for the most complex models like foundation model scaling, LLMs, diffusion, transformers, reinforcement learning, active learning. Of any size.
+
+<table>
+<tr>
+<th>What to change</th>
+<th>Resulting Fabric Code (copy me!)</th>
+</tr>
+<tr>
+<td>
+<sub>
 
 ```diff
 + import lightning as L
+  import torch; import torchvision as tv
 
-  import torch
-  import torch.nn as nn
-  from torch.utils.data import DataLoader, Dataset
-
-  class PyTorchModel(nn.Module):
-      ...
-
-  class PyTorchDataset(Dataset):
-      ...
-
-+ fabric = L.Fabric(accelerator="cuda", devices=8, strategy="ddp")
++ fabric = L.Fabric()
 + fabric.launch()
 
-- device = "cuda" if torch.cuda.is_available() else "cpu
-  model = PyTorchModel(...)
-  optimizer = torch.optim.SGD(model.parameters())
+  model = tv.models.resnet18()
+  optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
+- device = "cuda" if torch.cuda.is_available() else "cpu"
+- model.to(device)
 + model, optimizer = fabric.setup(model, optimizer)
-  dataloader = DataLoader(PyTorchDataset(...), ...)
+
+  dataset = tv.datasets.CIFAR10("data", download=True,
+                                train=True,
+                                transform=tv.transforms.ToTensor())
+  dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)
 + dataloader = fabric.setup_dataloaders(dataloader)
-  model.train()
 
+  model.train()
+  num_epochs = 10
   for epoch in range(num_epochs):
       for batch in dataloader:
-          input, target = batch
--         input, target = input.to(device), target.to(device)
+          inputs, labels = batch
+-         inputs, labels = inputs.to(device), labels.to(device)
           optimizer.zero_grad()
-          output = model(input)
-          loss = loss_fn(output, target)
+          outputs = model(inputs)
+          loss = torch.nn.functional.cross_entropy(outputs, labels)
 -         loss.backward()
 +         fabric.backward(loss)
           optimizer.step()
-          lr_scheduler.step()
 ```
 
+</sub>
+<td>
+<sub>
+
+```Python
+import lightning as L
+import torch; import torchvision as tv
+
+fabric = L.Fabric()
+fabric.launch()
+
+model = tv.models.resnet18()
+optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
+model, optimizer = fabric.setup(model, optimizer)
+
+dataset = tv.datasets.CIFAR10("data", download=True,
+                              train=True,
+                              transform=tv.transforms.ToTensor())
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=8)
+dataloader = fabric.setup_dataloaders(dataloader)
+
+model.train()
+num_epochs = 10
+for epoch in range(num_epochs):
+    for batch in dataloader:
+        inputs, labels = batch
+        optimizer.zero_grad()
+        outputs = model(inputs)
+        loss = torch.nn.functional.cross_entropy(outputs, labels)
+        fabric.backward(loss)
+        optimizer.step()
+```
+
+</sub>
+</td>
+</tr>
+</table>
+
+## Key features
+
+<details>
+  <summary>Easily switch from running on CPU to GPU (Apple Silicon, CUDA, …), TPU, multi-GPU or even multi-node training</summary>
+
+```python
+# Use your available hardware
+# no code changes needed
+fabric = Fabric()
+
+# Run on GPUs (CUDA or MPS)
+fabric = Fabric(accelerator="gpu")
+
+# 8 GPUs
+fabric = Fabric(accelerator="gpu", devices=8)
+
+# 256 GPUs, multi-node
+fabric = Fabric(accelerator="gpu", devices=8, num_nodes=32)
+
+# Run on TPUs
+fabric = Fabric(accelerator="tpu")
+```
+
+</details>
+
+<details>
+  <summary>Use state-of-the-art distributed training strategies (DDP, FSDP, DeepSpeed) and mixed precision out of the box</summary>
+
+```python
+# Use state-of-the-art distributed training techniques
+fabric = Fabric(strategy="ddp")
+fabric = Fabric(strategy="deepspeed")
+fabric = Fabric(strategy="fsdp")
+
+# Switch the precision
+fabric = Fabric(precision="16-mixed")
+fabric = Fabric(precision="64")
+```
+
+</details>
+
+<details>
+  <summary>All the device logic boilerplate is handled for you</summary>
+
+```diff
+  # no more of this!
+- model.to(device)
+- batch.to(device)
+```
+
+</details>
+
+<details>
+  <summary>Build your own custom Trainer using Fabric primitives for training checkpointing, logging, and more</summary>
+
+```python
+import lightning as L
+
+
+class MyCustomTrainer:
+    def __init__(self, accelerator="auto", strategy="auto", devices="auto", precision="32-true"):
+        self.fabric = L.Fabric(accelerator=accelerator, strategy=strategy, devices=devices, precision=precision)
+
+    def fit(self, model, optimizer, dataloader, max_epochs):
+        self.fabric.launch()
+
+        model, optimizer = self.fabric.setup(model, optimizer)
+        dataloader = self.fabric.setup_dataloaders(dataloader)
+        model.train()
+
+        for epoch in range(max_epochs):
+            for batch in dataloader:
+                input, target = batch
+                optimizer.zero_grad()
+                output = model(input)
+                loss = loss_fn(output, target)
+                self.fabric.backward(loss)
+                optimizer.step()
+```
+
+You can find a more extensive example in our [examples](../../examples/fabric/build_your_own_trainer)
+
+</details>
+
+______________________________________________________________________
+
+<div align="center">
+    <a href="https://lightning.ai/docs/fabric/stable/">Read the Lightning Fabric docs</a>
+</div>
+
 ______________________________________________________________________
 
 # Getting started