diff --git a/tests/test_models.py b/tests/test_models.py
index 526c52d9..05776e36 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -171,7 +171,7 @@ def test_pvt():
         depths=[2, 2, 2, 2],
         sr_ratios=[8, 4, 2, 1],
         decoder_config=[512, 10],
-        num_classes=10,
+        n_classes=10,
     )
     out = model(img_3channels_224)
     assert out.shape == (4, 10)
@@ -187,7 +187,7 @@ def test_pvt():
         depths=[2, 2, 2, 2],
         sr_ratios=[8, 4, 2, 1],
         decoder_config=512,
-        num_classes=10,
+        n_classes=10,
     )
     out = model(img_3channels_224)
     assert out.shape == (4, 10)
@@ -198,17 +198,17 @@ def test_pvt():
     assert out.shape == (4, 1000)
     del model
 
-    model = MODEL_REGISTRY.get("PVTClassificationV2")(num_classes=10)
+    model = MODEL_REGISTRY.get("PVTClassificationV2")(n_classes=10)
     out = model(img_3channels_224)
     assert out.shape == (4, 10)
     del model
 
-    model = MODEL_REGISTRY.get("PVTClassificationV2")(num_classes=10)
+    model = MODEL_REGISTRY.get("PVTClassificationV2")(n_classes=10)
     out = model(img_3channels_224)
     assert out.shape == (4, 10)
     del model
 
-    model = MODEL_REGISTRY.get("PVTClassification")(num_classes=12)
+    model = MODEL_REGISTRY.get("PVTClassification")(n_classes=12)
     out = model(img_3channels_224)
     assert out.shape == (4, 12)
     del model
@@ -305,7 +305,7 @@ def test_cvt():
         embedding_dim=768,
         num_heads=1,
         mlp_ratio=4.0,
-        num_classes=10,
+        n_classes=10,
         p_dropout=0.5,
         attn_dropout=0.3,
         drop_path=0.2,
@@ -356,7 +356,7 @@ def test_cct():
         embedding_dim=768,
         num_heads=1,
         mlp_ratio=4.0,
-        num_classes=10,
+        n_classes=10,
         p_dropout=0.5,
         attn_dropout=0.3,
         drop_path=0.2,
diff --git a/vformer/models/classification/cct.py b/vformer/models/classification/cct.py
index 6bbc36d3..e4d28ab8 100644
--- a/vformer/models/classification/cct.py
+++ b/vformer/models/classification/cct.py
@@ -32,7 +32,7 @@ class CCT(BaseClassificationModel):
         Number of heads in each transformer layer
     mlp_ratio:float
         Ratio of mlp heads to embedding dimension
-    num_classes: int
+    n_classes: int
         Number of classes for classification
     p_dropout: float
         Dropout probability
@@ -63,7 +63,7 @@ def __init__(
         head_dim=96,
         num_heads=1,
         mlp_ratio=4.0,
-        num_classes=1000,
+        n_classes=1000,
         p_dropout=0.1,
         attn_dropout=0.1,
         drop_path=0.1,
@@ -163,10 +163,10 @@ def __init__(
             assert (
                 decoder_config[0] == embedding_dim
             ), f"Configurations do not match for MLPDecoder, First element of `decoder_config` expected to be {embedding_dim}, got {decoder_config[0]} "
-            self.decoder = MLPDecoder(config=decoder_config, n_classes=num_classes)
+            self.decoder = MLPDecoder(config=decoder_config, n_classes=n_classes)
 
         else:
-            self.decoder = MLPDecoder(config=embedding_dim, n_classes=num_classes)
+            self.decoder = MLPDecoder(config=embedding_dim, n_classes=n_classes)
 
     def forward(self, x):
         """
@@ -178,7 +178,7 @@ def forward(self, x):
         Returns
         ----------
         torch.Tensor
-            Returns tensor of size `num_classes`
+            Returns tensor of size `n_classes`
 
         """
         x = self.embedding(x)
diff --git a/vformer/models/classification/convit.py b/vformer/models/classification/convit.py
index 4a2824a7..4a1b78ab 100644
--- a/vformer/models/classification/convit.py
+++ b/vformer/models/classification/convit.py
@@ -100,7 +100,7 @@ def forward(self, x):
         Returns
         ----------
         torch.Tensor
-            Returns tensor of size `num_classes`
+            Returns tensor of size `n_classes`
 
         """
         x = self.patch_embedding(x)
diff --git a/vformer/models/classification/convvt.py b/vformer/models/classification/convvt.py
index d6474b0d..b33be8f8 100644
--- a/vformer/models/classification/convvt.py
+++ b/vformer/models/classification/convvt.py
@@ -20,7 +20,7 @@ class ConvVT(nn.Module):
         Number of input channels in image, default is 3
     num_stages: int
         Number of stages in encoder block, default is 3
-    num_classes: int
+    n_classes: int
         Number of classes for classification, default is 1000
     * The following are all in list of int/float with length num_stages
     patch_size: list[int]
@@ -75,11 +75,11 @@ def __init__(
         stride_q=[1, 1, 1],
         in_channels=3,
         num_stages=3,
-        num_classes=1000,
+        n_classes=1000,
     ):
         super().__init__()
 
-        self.num_classes = num_classes
+        self.n_classes = n_classes
 
         self.num_stages = num_stages
         self.stages = []
@@ -111,9 +111,7 @@ def __init__(
 
         # Classifier head
         self.head = (
-            nn.Linear(embedding_dim[-1], num_classes)
-            if num_classes > 0
-            else nn.Identity()
+            nn.Linear(embedding_dim[-1], n_classes) if n_classes > 0 else nn.Identity()
         )
         trunc_normal_(self.head.weight, std=0.02)
 
diff --git a/vformer/models/classification/cross.py b/vformer/models/classification/cross.py
index 0e6c4558..429e8268 100644
--- a/vformer/models/classification/cross.py
+++ b/vformer/models/classification/cross.py
@@ -204,7 +204,7 @@ def forward(self, img):
         Returns
         ----------
         torch.Tensor
-            Returns tensor of size `num_classes`
+            Returns tensor of size `n_classes`
 
         """
         emb_s = self.s(img)
diff --git a/vformer/models/classification/cvt.py b/vformer/models/classification/cvt.py
index da2662e0..6d2ee4ae 100644
--- a/vformer/models/classification/cvt.py
+++ b/vformer/models/classification/cvt.py
@@ -32,7 +32,7 @@ class CVT(BaseClassificationModel):
         Number of heads in each transformer layer, default is 1
     mlp_ratio:float
         Ratio of mlp heads to embedding dimension, default is 4.0
-    num_classes: int
+    n_classes: int
         Number of classes for classification, default is 1000
     p_dropout: float
         Dropout probability, default is 0.0
@@ -57,7 +57,7 @@ def __init__(
         num_layers=1,
         num_heads=1,
         mlp_ratio=4.0,
-        num_classes=1000,
+        n_classes=1000,
         p_dropout=0.1,
         attn_dropout=0.1,
         drop_path=0.1,
@@ -149,9 +149,9 @@ def __init__(
             assert (
                 decoder_config[0] == embedding_dim
             ), f"Configurations do not match for MLPDecoder, First element of `decoder_config` expected to be {embedding_dim}, got {decoder_config[0]} "
-            self.decoder = MLPDecoder(config=decoder_config, n_classes=num_classes)
+            self.decoder = MLPDecoder(config=decoder_config, n_classes=n_classes)
         else:
-            self.decoder = MLPDecoder(config=embedding_dim, n_classes=num_classes)
+            self.decoder = MLPDecoder(config=embedding_dim, n_classes=n_classes)
 
     def forward(self, x):
         """
@@ -163,7 +163,7 @@ def forward(self, x):
         Returns
         ----------
         torch.Tensor
-            Returns tensor of size `num_classes`
+            Returns tensor of size `n_classes`
 
         """
 
diff --git a/vformer/models/classification/pyramid.py b/vformer/models/classification/pyramid.py
index a0b74c3e..611b4b09 100644
--- a/vformer/models/classification/pyramid.py
+++ b/vformer/models/classification/pyramid.py
@@ -22,7 +22,7 @@ class PVTClassification(nn.Module):
         List of patch size
     in_channels: int
         Input channels in image, default=3
-    num_classes: int
+    n_classes: int
         Number of classes for classification
     embed_dims:  int
         Patch Embedding dimension
@@ -61,7 +61,7 @@ def __init__(
         img_size=224,
         patch_size=[7, 3, 3, 3],
         in_channels=3,
-        num_classes=1000,
+        n_classes=1000,
         embed_dims=[64, 128, 256, 512],
         num_heads=[1, 2, 4, 8],
         mlp_ratio=[4, 4, 4, 4],
@@ -159,9 +159,9 @@ def __init__(
             assert (
                 decoder_config[0] == embed_dims[-1]
             ), f"Configurations do not match for MLPDecoder, First element of `decoder_config` expected to be {embed_dims[-1]}, got {decoder_config[0]} "
-            self.decoder = MLPDecoder(config=decoder_config, n_classes=num_classes)
+            self.decoder = MLPDecoder(config=decoder_config, n_classes=n_classes)
         else:
-            self.decoder = MLPDecoder(config=embed_dims[-1], n_classes=num_classes)
+            self.decoder = MLPDecoder(config=embed_dims[-1], n_classes=n_classes)
 
     def forward(self, x):
         """
@@ -173,7 +173,7 @@ def forward(self, x):
         Returns
         ----------
         torch.Tensor
-            Returns tensor of size `num_classes`
+            Returns tensor of size `n_classes`
 
         """
         B = x.shape[0]
@@ -216,7 +216,7 @@ class PVTClassificationV2(PVTClassification):
         List of patch size
     in_channels: int
         Input channels in image, default is 3
-    num_classes: int
+    n_classes: int
         Number of classes for classification
     embedding_dims:  int
         Patch Embedding dimension
@@ -255,7 +255,7 @@ def __init__(
         img_size=224,
         patch_size=[7, 3, 3, 3],
         in_channels=3,
-        num_classes=1000,
+        n_classes=1000,
         embedding_dims=[64, 128, 256, 512],
         num_heads=[1, 2, 4, 8],
         mlp_ratio=[4, 4, 4, 4],
@@ -276,7 +276,7 @@ def __init__(
             img_size=img_size,
             patch_size=patch_size,
             in_channels=in_channels,
-            num_classes=num_classes,
+            n_classes=n_classes,
             embed_dims=embedding_dims,
             num_heads=num_heads,
             mlp_ratio=mlp_ratio,
diff --git a/vformer/models/classification/swin.py b/vformer/models/classification/swin.py
index 41cbe9b6..4df00c1a 100644
--- a/vformer/models/classification/swin.py
+++ b/vformer/models/classification/swin.py
@@ -149,7 +149,7 @@ def forward(self, x):
         Returns
         ----------
         torch.Tensor
-            Returns tensor of size `num_classes`
+            Returns tensor of size `n_classes`
 
         """
         x = self.patch_embed(x)
diff --git a/vformer/models/classification/vanilla.py b/vformer/models/classification/vanilla.py
index 4c987b86..ea839a5c 100644
--- a/vformer/models/classification/vanilla.py
+++ b/vformer/models/classification/vanilla.py
@@ -108,7 +108,7 @@ def forward(self, x):
         Returns
         ----------
         torch.Tensor
-            Returns tensor of size `num_classes`
+            Returns tensor of size `n_classes`
 
         """
         x = self.patch_embedding(x)
diff --git a/vformer/models/dense/PVT/detection.py b/vformer/models/dense/PVT/detection.py
index 7b6a2c47..216922fb 100644
--- a/vformer/models/dense/PVT/detection.py
+++ b/vformer/models/dense/PVT/detection.py
@@ -21,7 +21,7 @@ class PVTDetection(nn.Module):
         List of patch size
     in_channels: int
         Input channels in image, default=3
-    num_classes: int
+    n_classes: int
         Number of classes for classification
     embedding_dims:  int
         Patch Embedding dimension
@@ -197,7 +197,7 @@ class PVTDetectionV2(PVTDetection):
         List of patch size
     in_channels: int
         Input channels in image, default=3
-    num_classes: int
+    n_classes: int
         Number of classes for classification
     embedding_dims:  int
         Patch Embedding dimension
diff --git a/vformer/models/dense/dpt.py b/vformer/models/dense/dpt.py
index 13f84021..32365c84 100644
--- a/vformer/models/dense/dpt.py
+++ b/vformer/models/dense/dpt.py
@@ -526,7 +526,7 @@ def __init__(self, scale_factor, mode, align_corners=False):
         self.align_corners = align_corners
 
     def forward(self, x):
-        """ Forward pass """
+        """Forward pass"""
 
         x = self.interp(
             x,
@@ -596,7 +596,7 @@ def __init__(self, features, activation=nn.GELU, bn=True):
         self.skip_add = nn.quantized.FloatFunctional()
 
     def forward(self, x):
-        """ forward pass"""
+        """forward pass"""
         out = self.activation(x)
         out = self.conv1(out)
         if self.bn == True:
@@ -651,7 +651,7 @@ def __init__(
         self.skip_add = nn.quantized.FloatFunctional()
 
     def forward(self, *xs):
-        """Forward pass """
+        """Forward pass"""
         output = xs[0]
 
         if len(xs) == 2: