diff --git a/proteinnetpy/mutation.py b/proteinnetpy/mutation.py
index fbc56b3..2a3ccf0 100644
--- a/proteinnetpy/mutation.py
+++ b/proteinnetpy/mutation.py
@@ -1,6 +1,6 @@
 """
-Module containing functions for mutating ProteinNetRecords and feeding that
-data into further computations (e.g. Tensorflow)
+Module containing functions for mutating ProteinNetRecords and feeding that data into further computations (e.g. Tensorflow).
+These functions are fairly specific so may often be better used as inspiration to build users own solutions.
 """
 import random
 import logging
@@ -15,27 +15,57 @@
 
 class ProteinNetMutator(LabeledFunction):
     """
-    Map function mutating records and outputting the mutant sequence
-    in additions to other features. Produces the
-    appropriate data for NN training with tensorflow.
+    Map function generating mutated records.
+
+
+    Apply a mutator function to a ProteinNet record and return the mutated sequence. This is a LabeledFunction that can be used to generate a TensorFlow Dataset. This setup is fairly specific to your downstream model design, so it will often be more useful to use it as a base to create an alternate implementation.
 
     Returns are in the form:
         ([wt_seq], mut_seq, [phi, psi, chi1]), label, [weights]
 
-    mutator: mutator function
-    per_position: whether mutants deleteriousness is tracked per sequence or per
-                 mutant. Requires a compatible mutator returning a tuple of
-                 mutant_seq, deleterious_inds, neutral_inds
-    include: variables to return for computation, alongside mutant sequence.
-             Options: (wt, phi, psi, chi1)
-    weights: weightings for [wt, deleterious, neutral] variants when processing
-             per residue variants
-    encoding: Optional dictionary mapping alphabetically encoded AA indeces to a new
-              scheme (e.g. that used in UniRep)
-    **kwargs: arguments passed on to mutator
+    Attributes
+    ----------
+    wildtype      : bool
+        Outputs wildtype as well as mutant sequence.
+    phi           : bool
+        Outputs Phi backbone angles.
+    psi           : bool
+        Outputs Psi backbone angles.
+    chi           : bool
+        Outputs rotamer angles.
+    mutator       : function
+        Mutator function taking a ProteinNetRecord and returning the sampled variants and their deleteriousness. The return format depends on `per_position`. If per_position=False must return a tuple with the mutated sequence index array and whether it is deleterious (1/0). If per_position=True must return a tuple with mutant_seq, deleterious_inds, neutral_inds arrays.
+    kwargs        : dict
+        Keyword arguments passed to the mutator function.
+    encoding      : dict
+        Encoding mapping alphabetically encoded integer indeces to a new scheme.
+    weights       : list
+        List of float weights for WT, Deleterious and Neutral variants when mutating per position.
+    func          : function
+        Function applied when the class is called. This is a mutator applied to the whole sequence or per position derived from the initialisation parameters.
+    output_shapes, output_types : tuple
+        Tuple of output shapes and types (see data.LabeledFunction for details)
     """
     def __init__(self, mutator, per_position=False, include=('wt',),
                  weights=(0, 1, 1), encoding=None, **kwargs):
+        """
+        Initialise the mutator.
+
+        Parameters
+        ----------
+        mutator       : function
+            Mutator function taking a ProteinNetRecord and returning the sampled variants and their deleteriousness. The return format depends on `per_position`. If per_position=False must return a tuple with the mutated sequence index array and whether it is deleterious (1/0). If per_position=True must return a tuple with mutant_seq, deleterious_inds, neutral_inds arrays.
+        per_position  : bool
+            Return deleteriousness for each position rather than the entire sequence.
+        include:      : sequence_like including some of {"wt", "phi", "psi", "chi1"}
+            Variables to return for computation, alongside mutant sequence.
+        weights       : list
+            weightings for [wt, deleterious, neutral] variants when processing per residue variants
+        encoding      : dict
+            Optional dictionary mapping alphabetically encoded AA integer indeces to a new scheme (e.g. that used in UniRep).
+        **kwargs      : dict
+            arguments passed on to mutator
+        """
         self.wildtype = 'wt' in include
         self.phi = 'phi' in include
         self.psi = 'psi' in include
@@ -81,7 +111,12 @@ def __init__(self, mutator, per_position=False, include=('wt',),
 
     def _per_position_func(self, record):
         """
-        Function producing per position labels
+        Function producing per position mutants and labels from a ProteinNetRecord
+
+        Parameters
+        ----------
+        record : ProteinNetRecord
+            Record to mutate.
         """
         mut_seq, deleterious, neutral = self.mutator(record, **self.kwargs)
 
@@ -115,7 +150,12 @@ def _per_position_func(self, record):
 
     def _whole_seq_func(self, record):
         """
-        Function producing a single label for the sequence
+        Function applying mutator to a record and producing mutant data and a single label for the sequence from a ProteinNetRecord.
+
+        Parameters
+        ----------
+        record : ProteinNetRecord
+            Record to mutate.
         """
         mut_seq, label = self.mutator(record, **self.kwargs)
 
@@ -146,8 +186,27 @@ def _whole_seq_func(self, record):
 def sequence_mutator(record, p_deleterious=0.5, max_mutations=3,
                      max_deleterious=0.01, min_neutral=0.1):
     """
-    Generate mutated sequences from ProteinNetRecords with a few deleterious or
-    neutral variants, along with a label identifying them as deleterious (1) or neutral (0)
+    Generate mutated sequences from a ProteinNetRecord with a few deleterious or neutral variants.
+
+    Generate mutated sequences from a ProteinNetRecord with a few deleterious and/or neutral variants. First randomly choose to generate a deleterious or neutral sequence then sample some of the corresponding variant types based on the records MSA frequencies.
+
+    Parameters
+    ----------
+    record          : ProteinNetRecord
+        Record to mutate.
+    p_deleterious   : float
+        Probability of returning a deleterious set of variants.
+    max_mutations   : int
+        Maximum number of mutations to make.
+    max_deleterious : float
+        Maximum MSA frequency for a variant to be considered deleterious.
+    min_neutral     : float
+        Minimum MSA frequency for a variant to be considered neutral.
+
+    Returns
+    -------
+    tuple
+        Tuple of the format (seq, deleterious). The first entry is the mutated amino acid sequence, encoded with integer indeces and the second is 1 if the sequence is deleterious and 0 if neutral.
     """
     deleterious = int(random.random() < p_deleterious)
     seq = record.primary_ind.copy()
@@ -170,9 +229,28 @@ def sequence_mutator(record, p_deleterious=0.5, max_mutations=3,
 def per_position_mutator(record, max_deleterious=2, max_neutral=4,
                          max_deleterious_freq=0.01, min_neutral_freq=0.1):
     """
-    Geneate mutated sequences from ProteinNetRecords and return the variant sequence along
-    with labels identifying deleterious and neutral positions. Will always generate at least
-    one variant.
+    Generate mutated sequences from ProteinNetRecords with labels identifying deleterious and neutral mutations.
+
+    Generate mutated sequences from ProteinNetRecords with labels identifying where deleterious and neutral mutations have been made.
+    Will always generate at least one variant.
+
+    Parameters
+    ----------
+    record               : ProteinNetRecord
+        Record to mutate.
+    max_deleterious      : int
+        Maximum number of deleterious variants to make.
+    max_neutral          : int
+        Maximum number of neutral variants to make.
+    max_deleterious_freq : float
+        Maximum MSA frequency for a variant to be considered deleterious.
+    min_neutral_freq     : float
+        Minimum MSA frequency for a variant to be considered neutral.
+
+    Returns
+    -------
+    tuple
+        Tuple of the format seq, deleterious, neutral. The first entry is the mutated sequence, the second a list of positions with deleterious variants and the third a list of positions with neutral variants.
     """
     seq = record.primary_ind.copy()
 
@@ -223,15 +301,27 @@ def per_position_mutator(record, max_deleterious=2, max_neutral=4,
 
 def sample_deleterious(num, pssm, wt_seq, max_freq=0.025, mask=None):
     """
-    Sample deleterious mutations froma pssm
-
-    num: number of mutations to make
-    pssm: PSSM in a np.array
-    wt_seq: WT sequence of the protein
-    max_freq: maximum frequency considered deleterious
-    mask: positions to mask
-
-    returns: np.array(positions), np.array(substitutions)
+    Sample deleterious mutations from a MSA frequency matrix.
+
+    Randomly choose a selection of deleterious variants from a MSA frequency matrix.
+
+    Parameters
+    ----------
+    num      : int
+        Number of mutations to make.
+    pssm     : float ndarray (20, N)
+        MSA frequency matrix to determine neutral and deleterious variants.
+    wt_seq   : int ndarray (N,)
+        WT sequence of the protein (as int indeces corresponding to the MSA matrix rows).
+    max_freq : float
+        Maximum frequency considered deleterious.
+    mask     : int array_like
+        Array of positions not to mutate.
+
+    Returns
+    -------
+    tuple
+        Numpy array of position indeces chosen and an array of the alternate amino acid in each position (as MSA row indeces).
     """
     if num == 0:
         raise ValueError('num must be > 0')
@@ -261,13 +351,23 @@ def sample_neutral(num, pssm, wt_seq, min_freq=0.025, mask=None):
     """
     Sample deleterious mutations froma pssm
 
-    num: number of mutations to make
-    pssm: PSSM in a np.array
-    wt_seq: WT sequence of the protein
-    min_freq: minimum frequency considered neutral
-    mask: positions to mask
-
-    returns: np.array(positions), np.array(substitutions, as PSSM row indeces)
+    Parameters
+    ----------
+    num      : int
+        Number of mutations to make.
+    pssm     : float ndarray (20, N)
+        MSA frequency matrix to determine neutral and deleterious variants.
+    wt_seq   : int ndarray (N,)
+        WT sequence of the protein (as int indeces corresponding to the MSA matrix rows).
+    min_freq : float
+        Minimum frequency considered neutral.
+    mask     : int array_like
+        Array of positions not to mutate.
+
+    Returns
+    -------
+    tuple
+        Numpy array of position indeces chosen and an array of the alternate amino acid in each position (as MSA row indeces).
     """
     if num == 0:
         return None, None
diff --git a/proteinnetpy/record.py b/proteinnetpy/record.py
index 4b36090..2fccf30 100644
--- a/proteinnetpy/record.py
+++ b/proteinnetpy/record.py
@@ -23,12 +23,94 @@ class ProteinNetRecord:
     # pylint: disable=invalid-name
     # pylint: disable=redefined-builtin
     """
+    Record from a ProteinNet dataset.
+
     A record from a ProteinNet database, with support for torsion angles and additional
-    per-position profiles (e.g. precalculated from something like UniRep)
+    per-position profiles (e.g. precalculated from a language model).
+    The only required attributes are id and primary, allow various others are derived from these on initialisation.
+    Many attributes are identified from the ID, based on the format of the main ProteinNet files IDs.
+    This will likely fail if other data is put into the same format, but should not reduce functionality much as only a few specific functions utilise this data, which is mainly for information purposes.
+
+    Attributes
+    ----------
+    id                 : str
+        Record ID
+    split              : {'training', 'testing', 'validation'}
+        The data split the record comes from. Identified from the ID.
+    record_class       : str
+        Split the record comes from, for validation records. Identified from the ID.
+    source             : str
+        Source of the record. Identified from the ID.
+    casp_id            : str
+        CASP ID of the record, for records in the test set sourced from CASP entries. Identified from the ID.
+    astral_id          : str
+        Astral ID of the record, for those sourced from Astral. Identified from the ID.
+    pdb_id             : str
+        PDB ID of the record, for those deriving from a PDB entry. Identified from the ID.
+    pdb_chain_number   : str
+        Numeric PDB Chain of the record, for those deriving from a PDB entry. Identified from the ID.
+    pdb_chain          : str
+        Alphabetical PDB Chain of the record, for those deriving from a PDB entry. Identified from the ID.
+    evolutionary       : float ndarray (20, N)
+        Variant frequencies accross a multiple sequence alignment for this protein.
+    info_content       : float ndarray (N,)
+        Information content of the MSA at each position.
+    primary            : U1 ndarray (N,)
+        Protein sequence in single letter code form.
+    primary_ind        : int ndarray (N,)
+        Protein sequence in integer form, based on the index of amino acids in single letter alphabetical order (see record.AMINO_ACIDS and record.AA_HASH).
+    secondary          : ndarray
+        Protein secondary structure (currently not included in the dataset)
+    tertiary           : float ndarray (3, 3N)
+        Residue atom coordinates. Rows are x,y,z cartesian coordinates. Each residue includes 3 columns for the N, CA and C' backbone atoms. This means the atom at position i starts in column 3i and a matrix of N/CA/C' atom coordinates can be extracted with indeces in the arithmetic series 3x + c with c = 0 for N, c = 1 for CA or c = 2 for C'.
+    mask               : int ndarray (N,)
+        Mask indicating which residues have structural information. Residues marked with 1 have information present. The mask needs to be tripled to apply to the tertiary structure array. None if tertiary is None.
+    rama               : float ndarray (3, N)
+        Backbone angles for each residue, calculated from tertiary. The rows (first index) represent omega, phi and psi angles. Either in radians or normalised between -1 and 1.
+    rama_mask          : int ndarray (N,)
+        Mask indicating which residues have backbone angles, with 1 indicating information is present. None if rama is None.
+    chi                : float ndarray (N,)
+        Chi1 side chain rotamer conformation. Either in radians or normalised between -1 and 1. This requires more information than `tertiary` provides so can only be calculated from the full structural model (for example with the add_angles_to_proteinnet script).
+    chi_mask           : int ndarray (N,)
+        Mask indicating which residues have rotamer angles. None if chi is None.
+    profiles           : ndarray (X, N)
+        Additional profiles for each amino acid position. Can contain any additional information the user requires.
+    _normalised_angles : bool
+        Torsion angles have been normalised to vary between -1 and 1, rather than -pi to pi.
     """
     def __init__(self, id, primary, evolutionary=None, info_content=None, secondary=None,
                  tertiary=None, mask=None, rama=None, rama_mask=None, chi=None,
                  chi_mask=None, profiles=None):
+        """
+        Initialise the record.
+
+        Parameters
+        ----------
+        id                 : str
+            Record ID
+        primary            : U1 ndarray (N,)
+            Protein sequence in single letter code form.
+        evolutionary       : float ndarray (20, N)
+            Variant frequencies accross a multiple sequence alignment for this protein.
+        info_content       : float ndarray (N,)
+            Information content of the MSA at each position.
+        secondary          : ndarray
+            Protein secondary structure (currently not included in the dataset)
+        tertiary           : float ndarray (3, 3N)
+            Residue atom coordinates. Rows are x,y,z cartesian coordinates. Each residue includes 3 columns for the N, CA and C' backbone atoms. This means the atom at position i starts in column 3i and a matrix of N/CA/C' atom coordinates can be extracted with indeces in the arithmetic series 3x + c with c = 0 for N, c = 1 for CA or c = 2 for C'.
+        mask               : int ndarray (N,)
+            Mask indicating which residues have structural information. Residues marked with 1 have information present. The mask needs to be tripled to apply to the tertiary structure array. None if tertiary is None.
+        rama               : float ndarray (3, N)
+            Backbone angles for each residue, calculated from tertiary. The rows (first index) represent omega, phi and psi angles. Either in radians or normalised between -1 and 1.
+        rama_mask          : int ndarray (N,)
+            Mask indicating which residues have backbone angles, with 1 indicating information is present. None if rama is None.
+        chi                : float ndarray (N,)
+            Chi1 side chain rotamer conformation. Either in radians or normalised between -1 and 1. This requires more information than `tertiary` provides so can only be calculated from the full structural model (for example with the add_angles_to_proteinnet script).
+        chi_mask           : int ndarray (N,)
+            Mask indicating which residues have rotamer angles. None if chi is None.
+        profiles           : ndarray (X, N)
+            Additional profiles for each amino acid position. Can contain any additional information the user requires.
+        """
         self.id = id
 
         # Manage the different properties for Testing/Validation/Training sets
@@ -76,6 +158,11 @@ def __init__(self, id, primary, evolutionary=None, info_content=None, secondary=
     def _set_id_properties(self, id):
         """
         Extract details from the identifier
+
+        Paramters
+        ---------
+        id : str
+            Record ID.
         """
         if CASP_REGEX.match(id):
             self.split = 'testing'
@@ -165,15 +252,32 @@ def __str__(self):
 
     def enumerate_sequence(self, aa_hash=AA_HASH):
         """
-        Generate a numeric representation of the sequence with each
-        amino acid represented by an interger index, as mapped in aa_hash.
+        Generate a numeric representation of the sequence with each amino acid represented by an interger index.
+
+        Generate a numeric representation of the sequence with each amino acid represented by an interger index. The default indeces are in single letter code alphabetical order. This function is used in __init__ to generate primary_ind.
+
+        Parameters
+        ----------
+        aa_hash : dict, optional
+            Dictionary mapping single letter codes to ints. Can be replaced to interface with a model that enumerates amino acids differently. For example Unirep orders amino acids alphabetically by full name.
         """
         return np.array([aa_hash[aa] for aa in self.primary], dtype=np.int)
 
     def get_one_hot_sequence(self, aa_hash=AA_HASH):
         """
         Generate a 1-hot encoded matrix of the proteins sequence.
-        aa_hash maps amino acids to their row number.
+
+        Generate a 1-hot encoded matrix of the proteins sequence. The default indeces are in single letter code alphabetical order, which can be altered to feed into models expecting different orders.
+
+        Parameters
+        ----------
+        aa_hash : dict, optional
+            Dictionary mapping single letter codes to ints. Can be replaced to interface with a model that enumerates amino acids differently. For example Unirep orders amino acids alphabetically by full name.
+
+        Returns
+        -------
+        int ndarray (20, N)
+            One-hot encoded primary sequence for the record. Matrix with 20 rows representing each amino acid. Each column has a single 1 in the row of its amino acid.
         """
         num_aas = max(v for k, v in aa_hash.items()) + 1
         indeces = np.array([aa_hash[aa] for aa in self.primary])
@@ -184,8 +288,9 @@ def get_one_hot_sequence(self, aa_hash=AA_HASH):
 
     def calculate_backbone_angles(self):
         """
-        Calculate Omega, Phi, and Psi from the ProteinNet 3D coordinates and set them
-        as instance variables
+        Calculate Omega, Phi, and Psi backbone angles and set the rama attribute.
+
+        Calculate Omega, Phi, and Psi backbone angles from the tertiary structure included in ProteinNet, and set the results as the rama attribute. The rama_mask attribute is also caculated and set.
         """
         coords = self.tertiary.T
         psi = np.array([calc_dihedral(coords[i:(i+4)]) for i in range(0, 3*len(self)-3, 3)] + [0])
@@ -206,8 +311,14 @@ def calculate_backbone_angles(self):
 
     def normalise_angles(self, factor=np.pi):
         """
-        Normalise backbone and chi angles to betweeen -1 and 1. Will only
-        apply normalisation once per record.
+        Normalise backbone and chi angles to betweeen -1 and 1.
+
+        Normalise backbone and chi angles to betweeen -1 and 1. Also sets a flag indicating angles have been normalised, and does nothing if this is set to prevent normalising twice.
+
+        Parameters
+        ----------
+        factor : numeric
+            Factor to normalise angles by. It will not generally be useful to change this since the package naturally works in radians between -pi and pi, but may be useful if you need to work with angles in other formats.
         """
         if not self._normalised_angles:
             self._normalised_angles = True
@@ -216,7 +327,14 @@ def normalise_angles(self, factor=np.pi):
 
     def distance_matrix(self):
         """
-        Calculate the distance matrix between residues C-alpha atoms
+        Calculate the distance matrix between residues C-alpha atoms.
+
+        Calculate the distance matrix between residues C-alpha atoms. ProteinNet coordinates and therefore the distance matrix are in nanometers.
+
+        Returns
+        -------
+        float ndarray (N, N)
+            Distance matrix where X[i,j] gives the distance in nanometers between residue i and j.
         """
         calpha = np.copy(self.tertiary[:, 1::3])