diff --git a/scripts/apply_normative_models_yeo17.ipynb b/scripts/apply_normative_models_yeo17.ipynb
new file mode 100644
index 0000000..f31e52d
--- /dev/null
+++ b/scripts/apply_normative_models_yeo17.ipynb
@@ -0,0 +1,825 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2d8fb4c8-4360-4fdc-b0a2-e1c2e22bd8f9",
+   "metadata": {
+    "id": "2d8fb4c8-4360-4fdc-b0a2-e1c2e22bd8f9"
+   },
+   "source": [
+    "## Using lifespan models to make predictions on new data\n",
+    "\n",
+    "This notebook shows how to apply the coefficients from pre-estimated normative models to new data. This can be done in two different ways: (i) using a new set of data derived from the same sites used to estimate the model and (ii) on a completely different set of sites. In the latter case, we also need to estimate the site effect, which requires some calibration/adaptation data. As an illustrative example, we use a dataset derived from several [OpenNeuro datasets](https://openneuro.org/) and adapt the learned model to make predictions on these data. \n",
+    "\n",
+    "First, if necessary, we install PCNtoolkit (note: this tutorial requires at least version 0.20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d05182a-5346-49d2-bfbf-fd3769ecc061",
+   "metadata": {
+    "id": "8d05182a-5346-49d2-bfbf-fd3769ecc061"
+   },
+   "outputs": [],
+   "source": [
+    "! pip install pcntoolkit==0.26"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ddd7b3cb-b018-4ed4-8b55-15728d8c5411",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "ddd7b3cb-b018-4ed4-8b55-15728d8c5411",
+    "outputId": "5760e660-4773-4173-f1d4-ecd107c19040"
+   },
+   "outputs": [],
+   "source": [
+    "! git clone https://github.com/predictive-clinical-neuroscience/braincharts.git"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1849f76-b17d-4286-bf57-50ff56e81bf8",
+   "metadata": {
+    "id": "b1849f76-b17d-4286-bf57-50ff56e81bf8"
+   },
+   "outputs": [],
+   "source": [
+    "# we need to be in the scripts folder when we import the libraries in the code block below,\n",
+    "# because there is a function called nm_utils that is in the scripts folder that we need to import\n",
+    "import os\n",
+    "os.chdir('/content/braincharts/scripts/') #this path is setup for running on Google Colab. Change it to match your local path if running locally"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b2227bc7-e798-470a-99bc-33561ce4511b",
+   "metadata": {
+    "id": "b2227bc7-e798-470a-99bc-33561ce4511b"
+   },
+   "source": [
+    "Now we import the required libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff661cf2-7d80-46bb-bcfb-1650a93eed3d",
+   "metadata": {
+    "id": "ff661cf2-7d80-46bb-bcfb-1650a93eed3d"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import pickle\n",
+    "from matplotlib import pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n",
+    "from pcntoolkit.normative import estimate, predict, evaluate\n",
+    "from pcntoolkit.util.utils import compute_MSLL, create_design_matrix\n",
+    "from nm_utils import calibration_descriptives, remove_bad_subjects, load_2d"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78719463-28b2-4849-b970-cfbe2f07d214",
+   "metadata": {
+    "id": "78719463-28b2-4849-b970-cfbe2f07d214"
+   },
+   "source": [
+    "We need to decompress the model directory. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b1d4d4b-68ab-4bba-87f5-6062995805d0",
+   "metadata": {
+    "id": "3b1d4d4b-68ab-4bba-87f5-6062995805d0"
+   },
+   "outputs": [],
+   "source": [
+    "os.chdir('/content/braincharts/models/')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4b7b2f4-c514-4d4f-a6b0-9461e1b20831",
+   "metadata": {
+    "id": "d4b7b2f4-c514-4d4f-a6b0-9461e1b20831",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# we will use the biggest sample as our training set (approx. N=22000 subjects from 43 sites)\n",
+    "! tar -xvf lifespan_yeo17_22K_40sites.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "802b1da6-04cc-4310-af81-f50d38c3e653",
+   "metadata": {
+    "id": "802b1da6-04cc-4310-af81-f50d38c3e653"
+   },
+   "source": [
+    "Next, we configure some basic variables, like where we want the analysis to be done and which model we want to use.\n",
+    "\n",
+    "**Note:** We maintain a list of site ids for each dataset, which describe the site names in the training and test data (`site_ids_tr` and `site_ids_te`), plus also the adaptation data . The training site ids are provided as a text file in the distribution and the test ids are extracted automatically from the pandas dataframe (see below). If you use additional data from the sites (e.g. later waves from ABCD), it may be necessary to adjust the site names to match the names in the training set. See the accompanying paper for more details"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f52e2a19-9b63-4f0f-97c1-387f1a1872a2",
+   "metadata": {
+    "id": "f52e2a19-9b63-4f0f-97c1-387f1a1872a2"
+   },
+   "outputs": [],
+   "source": [
+    "# which model do we wish to use?\n",
+    "model_name = 'lifespan_yeo17_22K_40sites'\n",
+    "site_names = 'site_ids_yeo17_40sites.txt'\n",
+    "\n",
+    "# where the analysis takes place\n",
+    "root_dir = '/content/braincharts'\n",
+    "\n",
+    "# where the data files live\n",
+    "data_dir = '/content/braincharts/docs'\n",
+    "\n",
+    "# where the models live\n",
+    "out_dir = os.path.join(root_dir, 'models', model_name)\n",
+    "\n",
+    "# load a set of site ids from this model. This must match the training data\n",
+    "with open(os.path.join(root_dir,'docs', site_names)) as f:\n",
+    "    site_ids_tr = f.read().splitlines()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8dbaebd7-4f86-47d8-82a5-1776eb96690f",
+   "metadata": {
+    "id": "8dbaebd7-4f86-47d8-82a5-1776eb96690f"
+   },
+   "source": [
+    "### Download test dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3aab54a5-2579-48d8-a81b-bbd34cea1213",
+   "metadata": {
+    "id": "3aab54a5-2579-48d8-a81b-bbd34cea1213"
+   },
+   "source": [
+    "### Load test data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "262d429a-160b-4ba3-9ba4-9acc195bc644",
+   "metadata": {
+    "id": "262d429a-160b-4ba3-9ba4-9acc195bc644"
+   },
+   "outputs": [],
+   "source": [
+    "test_data = os.path.join(data_dir, 'OpenNeuroTransfer_yeo17_te.csv')\n",
+    "\n",
+    "df_te = pd.read_csv(test_data)\n",
+    "\n",
+    "# extract a list of unique site ids from the test set\n",
+    "site_ids_te =  sorted(set(df_te['site'].to_list()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c636509a-8b12-43f1-811c-08cb22640be2",
+   "metadata": {
+    "id": "c636509a-8b12-43f1-811c-08cb22640be2"
+   },
+   "source": [
+    "### (Optional) Load adaptation data\n",
+    "\n",
+    "If the data you wish to make predictions for is not derived from the same scanning sites as those in the trainig set, it is necessary to learn the site effect so that we can account for it in the predictions. In order to do this in an unbiased way, we use a separate dataset, which we refer to as 'adaptation' data. This must contain data for all the same sites as in the test dataset and we assume these are coded in the same way, based on a the 'sitenum' column in the dataframe. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53551023-aff6-4934-ad2d-d77bc63c562d",
+   "metadata": {
+    "id": "53551023-aff6-4934-ad2d-d77bc63c562d"
+   },
+   "outputs": [],
+   "source": [
+    "adaptation_data = os.path.join(data_dir, 'OpenNeuroTransfer_yeo17_ad.csv')\n",
+    "\n",
+    "df_ad = pd.read_csv(adaptation_data)\n",
+    "\n",
+    "# extract a list of unique site ids from the test set\n",
+    "site_ids_ad =  sorted(set(df_ad['site'].to_list()))\n",
+    "\n",
+    "if not all(elem in site_ids_ad for elem in site_ids_te):\n",
+    "    print('Warning: some of the testing sites are not in the adaptation data')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f73e30e-c693-44b8-98c6-52b71b577ea8",
+   "metadata": {
+    "id": "4f73e30e-c693-44b8-98c6-52b71b577ea8",
+    "tags": []
+   },
+   "source": [
+    "### Configure which models to fit\n",
+    "\n",
+    "Now, we configure which imaging derived phenotypes (IDPs) we would like to process. This is just a list of column names in the dataframe we have loaded above. \n",
+    "\n",
+    "We could load the whole set (i.e. all phenotypes for which we have models for ... "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b48e104c-cbac-4ae2-8377-cd3ff80162fd",
+   "metadata": {
+    "id": "b48e104c-cbac-4ae2-8377-cd3ff80162fd"
+   },
+   "outputs": [],
+   "source": [
+    "# load the list of idps for Yeo-17 parcellation network pairs\n",
+    "with open(os.path.join(root_dir,'docs','phenotypes_yeo17.txt')) as f:\n",
+    "    idp_ids = f.read().splitlines()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "280731ad-47d8-43e2-8cb5-4eccfd9f3f81",
+   "metadata": {
+    "id": "280731ad-47d8-43e2-8cb5-4eccfd9f3f81"
+   },
+   "source": [
+    "... or alternatively, we could just specify a list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b74d75f-77a5-474a-9c9b-29aab1ce53a2",
+   "metadata": {
+    "id": "8b74d75f-77a5-474a-9c9b-29aab1ce53a2"
+   },
+   "outputs": [],
+   "source": [
+    "idp_ids = ['VisualB_TemporalParietal', 'SensorimotorA_SensorimotorB']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "56ee1f7f-8684-4f1c-b142-a68176407029",
+   "metadata": {
+    "id": "56ee1f7f-8684-4f1c-b142-a68176407029"
+   },
+   "source": [
+    "### Configure covariates \n",
+    "\n",
+    "Now, we configure some parameters to fit the model. First, we choose which columns of the pandas dataframe contain the covariates (age and sex). The site parameters are configured automatically later on by the `configure_design_matrix()` function, when we loop through the IDPs in the list\n",
+    "\n",
+    "The supplied coefficients are derived from a 'warped' Bayesian linear regression model, which uses a nonlinear warping function to model non-Gaussianity (`sinarcsinh`) plus a non-linear basis expansion (a cubic b-spline basis set with 5 knot points, which is the default value in the PCNtoolkit package). Since we are sticking with the default value, we do not need to specify any parameters for this, but we do need to specify the limits. We choose to pad the input by a few years either side of the input range. We will also set a couple of options that control the estimation of the model\n",
+    "\n",
+    "For further details about the likelihood warping approach, see the accompanying paper and [Fraza et al 2021](https://www.biorxiv.org/content/10.1101/2021.04.05.438429v1)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62312b8e-4972-4238-abf9-87d9bb33cc10",
+   "metadata": {
+    "id": "62312b8e-4972-4238-abf9-87d9bb33cc10"
+   },
+   "outputs": [],
+   "source": [
+    "# which data columns do we wish to use as covariates? \n",
+    "cols_cov = ['age','sex', 'mean_FD']\n",
+    "\n",
+    "# limits for cubic B-spline basis \n",
+    "xmin = -5 \n",
+    "xmax = 110\n",
+    "\n",
+    "# Absolute Z treshold above which a sample is considered to be an outlier (without fitting any model)\n",
+    "outlier_thresh = 7"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "42bc1072-e9ed-4f2a-9fdd-cbd626a61542",
+   "metadata": {
+    "id": "42bc1072-e9ed-4f2a-9fdd-cbd626a61542"
+   },
+   "source": [
+    "### Make predictions\n",
+    "\n",
+    "This will make predictions for each IDP separately. This is done by extracting a column from the dataframe (i.e. specifying the IDP as the response variable) and saving it as a numpy array. Then, we configure the covariates, which is a numpy data array having the number of rows equal to the number of datapoints in the test set. The columns are specified as follows: \n",
+    "\n",
+    "- A global intercept (column of ones)\n",
+    "- The covariate columns (here age and sex, coded as 0=female/1=male)\n",
+    "- Dummy coded columns for the sites in the training set (one column per site)\n",
+    "- Columns for the basis expansion (seven columns for the default parameterisation)\n",
+    "\n",
+    "Once these are saved as numpy arrays in ascii format (as here) or (alternatively) in pickle format, these are passed as inputs to the `predict()` method in the PCNtoolkit normative modelling framework. These are written in the same format to the location specified by `idp_dir`. At the end of this step, we have a set of predictions and Z-statistics for the test dataset that we can take forward to further analysis.\n",
+    "\n",
+    "Note that when we need to make predictions on new data, the procedure is more involved, since we need to prepare, process and store covariates, response variables and site ids for the adaptation data. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07b7471b-c334-464f-8273-b409b7acaac2",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "07b7471b-c334-464f-8273-b409b7acaac2",
+    "outputId": "76fddc99-bed3-4bfc-f663-c21b1f39be37",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for idp_num, idp in enumerate(idp_ids): \n",
+    "    print('Running IDP', idp_num, idp, ':')\n",
+    "    idp_dir = os.path.join(out_dir, idp)\n",
+    "    os.chdir(idp_dir)\n",
+    "    \n",
+    "    # extract and save the response variables for the test set\n",
+    "    y_te = df_te[idp].to_numpy()\n",
+    "    \n",
+    "    # save the variables\n",
+    "    resp_file_te = os.path.join(idp_dir, 'resp_te.txt') \n",
+    "    np.savetxt(resp_file_te, y_te)\n",
+    "        \n",
+    "    # configure and save the design matrix\n",
+    "    cov_file_te = os.path.join(idp_dir, 'cov_bspline_te.txt')\n",
+    "    X_te = create_design_matrix(df_te[cols_cov], \n",
+    "                                site_ids = df_te['site'],\n",
+    "                                all_sites = site_ids_tr,\n",
+    "                                basis = 'bspline', \n",
+    "                                xmin = xmin, \n",
+    "                                xmax = xmax)\n",
+    "    np.savetxt(cov_file_te, X_te)\n",
+    "    \n",
+    "    # check whether all sites in the test set are represented in the training set\n",
+    "    if all(elem in site_ids_tr for elem in site_ids_te):\n",
+    "        print('All sites are present in the training data')\n",
+    "        \n",
+    "        # just make predictions\n",
+    "        yhat_te, s2_te, Z = predict(cov_file_te, \n",
+    "                                    alg='blr', \n",
+    "                                    respfile=resp_file_te, \n",
+    "                                    model_path=os.path.join(idp_dir,'Models'))\n",
+    "    else:\n",
+    "        print('Some sites missing from the training data. Adapting model')\n",
+    "        \n",
+    "        # save the covariates for the adaptation data\n",
+    "        X_ad = create_design_matrix(df_ad[cols_cov], \n",
+    "                                    site_ids = df_ad['site'],\n",
+    "                                    all_sites = site_ids_tr,\n",
+    "                                    basis = 'bspline', \n",
+    "                                    xmin = xmin, \n",
+    "                                    xmax = xmax)\n",
+    "        cov_file_ad = os.path.join(idp_dir, 'cov_bspline_ad.txt')          \n",
+    "        np.savetxt(cov_file_ad, X_ad)\n",
+    "        \n",
+    "        # save the responses for the adaptation data\n",
+    "        resp_file_ad = os.path.join(idp_dir, 'resp_ad.txt') \n",
+    "        y_ad = df_ad[idp].to_numpy()\n",
+    "        np.savetxt(resp_file_ad, y_ad)\n",
+    "       \n",
+    "        # save the site ids for the adaptation data\n",
+    "        sitenum_file_ad = os.path.join(idp_dir, 'sitenum_ad.txt') \n",
+    "        site_num_ad = df_ad['sitenum'].to_numpy(dtype=int)\n",
+    "        np.savetxt(sitenum_file_ad, site_num_ad)\n",
+    "        \n",
+    "        # save the site ids for the test data \n",
+    "        sitenum_file_te = os.path.join(idp_dir, 'sitenum_te.txt')\n",
+    "        site_num_te = df_te['sitenum'].to_numpy(dtype=int)\n",
+    "        np.savetxt(sitenum_file_te, site_num_te)\n",
+    "         \n",
+    "        yhat_te, s2_te, Z = predict(cov_file_te, \n",
+    "                                    alg = 'blr', \n",
+    "                                    respfile = resp_file_te, \n",
+    "                                    model_path = os.path.join(idp_dir,'Models'),\n",
+    "                                    adaptrespfile = resp_file_ad,\n",
+    "                                    adaptcovfile = cov_file_ad,\n",
+    "                                    adaptvargroupfile = sitenum_file_ad,\n",
+    "                                    testvargroupfile = sitenum_file_te)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1707c118-cb42-4a14-bfda-188693d8f5cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "suffix = 'predict'\n",
+    "warp =  'WarpSinArcsinh'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1e633e3-ad24-4013-9843-ca7869291029",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialise dataframe we will use to store quantitative metrics \n",
+    "blr_metrics = pd.DataFrame(columns = ['eid', 'NLL', 'EV', 'Skew','Kurtosis'])\n",
+    "\n",
+    "for idp_num, idp in enumerate(idp_ids): \n",
+    "    idp_dir = os.path.join(out_dir, idp)\n",
+    "    \n",
+    "    # load the predictions and true data. We use a custom function that ensures 2d arrays\n",
+    "    # equivalent to: y = np.loadtxt(filename); y = y[:, np.newaxis]\n",
+    "    yhat_te = load_2d(os.path.join(idp_dir, 'yhat_' + suffix + '.txt'))\n",
+    "    s2_te = load_2d(os.path.join(idp_dir, 'ys2_' + suffix + '.txt'))\n",
+    "    y_te = load_2d(os.path.join(idp_dir, 'resp_te.txt'))\n",
+    "    \n",
+    "    with open(os.path.join(idp_dir,'Models', 'NM_0_0_estimate.pkl'), 'rb') as handle:\n",
+    "        nm = pickle.load(handle) \n",
+    "    \n",
+    "    # compute error metrics\n",
+    "    if warp is None:\n",
+    "        metrics = evaluate(y_te, yhat_te)  \n",
+    "        \n",
+    "    else:\n",
+    "        warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1] \n",
+    "        W = nm.blr.warp\n",
+    "        \n",
+    "        # warp predictions\n",
+    "        med_te = W.warp_predictions(np.squeeze(yhat_te), np.squeeze(s2_te), warp_param)[0]\n",
+    "        med_te = med_te[:, np.newaxis]\n",
+    "       \n",
+    "        # evaluation metrics\n",
+    "        metrics = evaluate(y_te, med_te)\n",
+    "    \n",
+    "    \n",
+    "    Z = np.loadtxt(os.path.join(idp_dir, 'Z_' + suffix + '.txt'))\n",
+    "    [skew, sdskew, kurtosis, sdkurtosis, semean, sesd] = calibration_descriptives(Z)\n",
+    "    \n",
+    "    \n",
+    "    blr_metrics.loc[len(blr_metrics)] = [idp, nm.neg_log_lik, metrics['EXPV'][0], \n",
+    "                                         skew, kurtosis]\n",
+    "    \n",
+    "blr_metrics.to_csv(os.path.join(out_dir, 'blr_transfer_metrics.csv'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "75210821-ccb8-4bd2-82f3-641708811b21",
+   "metadata": {
+    "id": "75210821-ccb8-4bd2-82f3-641708811b21"
+   },
+   "source": [
+    "### Preparing dummy data for plotting\n",
+    "\n",
+    "Now, we plot the centiles of variation estimated by the normative model. \n",
+    "\n",
+    "We do this by making use of a set of dummy covariates that span the whole range of the input space (for age) for a fixed value of the other covariates (e.g. sex) so that we can make predictions for these dummy data points, then plot them. We configure these dummy predictions using the same procedure as we used for the real data. We can use the same dummy data for all the IDPs we wish to plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d0743d8-28ca-4a14-8ef0-99bf40434b5b",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "2d0743d8-28ca-4a14-8ef0-99bf40434b5b",
+    "outputId": "00343be9-866d-4c9d-f243-351cbe9e4231"
+   },
+   "outputs": [],
+   "source": [
+    "# which sex do we want to plot? \n",
+    "sex = 0 # 1 = male 0 = female\n",
+    "if sex == 1: \n",
+    "    clr = 'blue';\n",
+    "else:\n",
+    "    clr = 'red'\n",
+    "\n",
+    "# create dummy data for visualisation\n",
+    "print('configuring dummy data ...')\n",
+    "xx = np.arange(xmin, xmax, 0.5)\n",
+    "X0_dummy = np.zeros((len(xx), 2))\n",
+    "X0_dummy[:,0] = xx\n",
+    "X0_dummy[:,1] = sex\n",
+    "\n",
+    "# create the design matrix\n",
+    "X_dummy = create_design_matrix(X0_dummy, xmin=xmin, xmax=xmax, site_ids=None, all_sites=site_ids_tr)\n",
+    "new_col = np.zeros(230)\n",
+    "new_col2 = np.reshape(new_col, [230,1])\n",
+    "X_dummy = np.append(X_dummy, new_col2, 1)\n",
+    "\n",
+    "# save the dummy covariates\n",
+    "cov_file_dummy = os.path.join(out_dir,'cov_bspline_dummy_mean.txt')\n",
+    "np.savetxt(cov_file_dummy, X_dummy)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "126323a3-2270-4796-97c4-94629730ddf7",
+   "metadata": {
+    "id": "126323a3-2270-4796-97c4-94629730ddf7"
+   },
+   "source": [
+    "### Plotting the normative models\n",
+    "\n",
+    "Now we loop through the IDPs, plotting each one separately. The outputs of this step are a set of quantitative regression metrics for each IDP and a set of centile curves which we plot the test data against. \n",
+    "\n",
+    "This part of the code is relatively complex because we need to keep track of many quantities for the plotting. We also need to remember whether the data need to be warped or not. By default in PCNtoolkit, predictions in the form of `yhat, s2` are always in the warped (Gaussian) space. If we want predictions in the input (non-Gaussian) space, then we need to warp them with the inverse of the estimated warping function. This can be done using the function `nm.blr.warp.warp_predictions()`. \n",
+    "\n",
+    "**Note:** it is necessary to update the intercept for each of the sites. For purposes of visualisation, here we do this by adjusting the median of the data to match the dummy predictions, but note that all the quantitative metrics are estimated using the predictions that are adjusted properly using a learned offset (or adjusted using a hold-out adaptation set, as above). Note also that for the calibration data we require at least two data points of the same sex in each site to be able to estimate the variance. Of course, in a real example, you would want many more than just two since we need to get a reliable estimate of the variance for each site. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cdd68cc6-212b-4149-b86a-24e842078e1a",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "id": "cdd68cc6-212b-4149-b86a-24e842078e1a",
+    "outputId": "f25fc8bb-3a7c-41f0-ebd7-60f39ebaf7bd",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "sns.set(style='whitegrid')\n",
+    "\n",
+    "for idp_num, idp in enumerate(idp_ids): \n",
+    "    print('Running IDP', idp_num, idp, ':')\n",
+    "    idp_dir = os.path.join(out_dir, idp)\n",
+    "    os.chdir(idp_dir)\n",
+    "    \n",
+    "    # load the true data points\n",
+    "    yhat_te = load_2d(os.path.join(idp_dir, 'yhat_predict.txt'))\n",
+    "    s2_te = load_2d(os.path.join(idp_dir, 'ys2_predict.txt'))\n",
+    "    y_te = load_2d(os.path.join(idp_dir, 'resp_te.txt'))\n",
+    "            \n",
+    "    # set up the covariates for the dummy data\n",
+    "    print('Making predictions with dummy covariates (for visualisation)')\n",
+    "    yhat, s2 = predict(cov_file_dummy, \n",
+    "                       alg = 'blr', \n",
+    "                       respfile = None, \n",
+    "                       model_path = os.path.join(idp_dir,'Models'), \n",
+    "                       outputsuffix = '_dummy')\n",
+    "    \n",
+    "    # load the normative model\n",
+    "    with open(os.path.join(idp_dir,'Models', 'NM_0_0_estimate.pkl'), 'rb') as handle:\n",
+    "        nm = pickle.load(handle) \n",
+    "    \n",
+    "    # get the warp and warp parameters\n",
+    "    W = nm.blr.warp\n",
+    "    warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1] \n",
+    "        \n",
+    "    # first, we warp predictions for the true data and compute evaluation metrics\n",
+    "    med_te = W.warp_predictions(np.squeeze(yhat_te), np.squeeze(s2_te), warp_param)[0]\n",
+    "    med_te = med_te[:, np.newaxis]\n",
+    "    print('metrics:', evaluate(y_te, med_te))\n",
+    "    \n",
+    "    # then, we warp dummy predictions to create the plots\n",
+    "    med, pr_int = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param)\n",
+    "    \n",
+    "    # extract the different variance components to visualise\n",
+    "    beta, junk1, junk2 = nm.blr._parse_hyps(nm.blr.hyp, X_dummy)\n",
+    "    s2n = 1/beta # variation (aleatoric uncertainty)\n",
+    "    s2s = s2-s2n # modelling uncertainty (epistemic uncertainty)\n",
+    "    \n",
+    "    # plot the data points\n",
+    "    y_te_rescaled_all = np.zeros_like(y_te)\n",
+    "    for sid, site in enumerate(site_ids_te):\n",
+    "        # plot the true test data points \n",
+    "        if all(elem in site_ids_tr for elem in site_ids_te):\n",
+    "            # all data in the test set are present in the training set\n",
+    "            \n",
+    "            # first, we select the data points belonging to this particular site\n",
+    "            idx = np.where(np.bitwise_and(X_te[:,2] == sex, X_te[:,sid+len(cols_cov)+1] !=0))[0]\n",
+    "            if len(idx) == 0:\n",
+    "                print('No data for site', sid, site, 'skipping...')\n",
+    "                continue\n",
+    "            \n",
+    "            # then directly adjust the data\n",
+    "            idx_dummy = np.bitwise_and(X_dummy[:,1] > X_te[idx,1].min(), X_dummy[:,1] < X_te[idx,1].max())\n",
+    "            y_te_rescaled = y_te[idx] - np.median(y_te[idx]) + np.median(med[idx_dummy])\n",
+    "        else:\n",
+    "            # we need to adjust the data based on the adaptation dataset \n",
+    "            \n",
+    "            # first, select the data point belonging to this particular site\n",
+    "            idx = np.where(np.bitwise_and(X_te[:,2] == sex, (df_te['site'] == site).to_numpy()))[0]\n",
+    "            \n",
+    "            # load the adaptation data\n",
+    "            y_ad = load_2d(os.path.join(idp_dir, 'resp_ad.txt'))\n",
+    "            X_ad = load_2d(os.path.join(idp_dir, 'cov_bspline_ad.txt'))\n",
+    "            idx_a = np.where(np.bitwise_and(X_ad[:,2] == sex, (df_ad['site'] == site).to_numpy()))[0]\n",
+    "            if len(idx) < 2 or len(idx_a) < 2:\n",
+    "                print('Insufficent data for site', sid, site, 'skipping...')\n",
+    "                continue\n",
+    "            \n",
+    "            # adjust and rescale the data\n",
+    "            y_te_rescaled, s2_rescaled = nm.blr.predict_and_adjust(nm.blr.hyp, \n",
+    "                                                                   X_ad[idx_a,:], \n",
+    "                                                                   np.squeeze(y_ad[idx_a]), \n",
+    "                                                                   Xs=None, \n",
+    "                                                                   ys=np.squeeze(y_te[idx]))\n",
+    "        # plot the (adjusted) data points\n",
+    "        plt.scatter(X_te[idx,1], y_te_rescaled, s=4, color=clr, alpha = 0.1)\n",
+    "       \n",
+    "    # plot the median of the dummy data\n",
+    "    plt.plot(xx, med, clr)\n",
+    "    \n",
+    "    # fill the gaps in between the centiles\n",
+    "    junk, pr_int25 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.25,0.75])\n",
+    "    junk, pr_int95 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.05,0.95])\n",
+    "    junk, pr_int99 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.01,0.99])\n",
+    "    plt.fill_between(xx, pr_int25[:,0], pr_int25[:,1], alpha = 0.1,color=clr)\n",
+    "    plt.fill_between(xx, pr_int95[:,0], pr_int95[:,1], alpha = 0.1,color=clr)\n",
+    "    plt.fill_between(xx, pr_int99[:,0], pr_int99[:,1], alpha = 0.1,color=clr)\n",
+    "            \n",
+    "    # make the width of each centile proportional to the epistemic uncertainty\n",
+    "    junk, pr_int25l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.25,0.75])\n",
+    "    junk, pr_int95l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.05,0.95])\n",
+    "    junk, pr_int99l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.01,0.99])\n",
+    "    junk, pr_int25u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.25,0.75])\n",
+    "    junk, pr_int95u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.05,0.95])\n",
+    "    junk, pr_int99u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.01,0.99])    \n",
+    "    plt.fill_between(xx, pr_int25l[:,0], pr_int25u[:,0], alpha = 0.3,color=clr)\n",
+    "    plt.fill_between(xx, pr_int95l[:,0], pr_int95u[:,0], alpha = 0.3,color=clr)\n",
+    "    plt.fill_between(xx, pr_int99l[:,0], pr_int99u[:,0], alpha = 0.3,color=clr)\n",
+    "    plt.fill_between(xx, pr_int25l[:,1], pr_int25u[:,1], alpha = 0.3,color=clr)\n",
+    "    plt.fill_between(xx, pr_int95l[:,1], pr_int95u[:,1], alpha = 0.3,color=clr)\n",
+    "    plt.fill_between(xx, pr_int99l[:,1], pr_int99u[:,1], alpha = 0.3,color=clr)\n",
+    "\n",
+    "    # plot actual centile lines\n",
+    "    plt.plot(xx, pr_int25[:,0],color=clr, linewidth=0.5)\n",
+    "    plt.plot(xx, pr_int25[:,1],color=clr, linewidth=0.5)\n",
+    "    plt.plot(xx, pr_int95[:,0],color=clr, linewidth=0.5)\n",
+    "    plt.plot(xx, pr_int95[:,1],color=clr, linewidth=0.5)\n",
+    "    plt.plot(xx, pr_int99[:,0],color=clr, linewidth=0.5)\n",
+    "    plt.plot(xx, pr_int99[:,1],color=clr, linewidth=0.5)\n",
+    "    \n",
+    "    plt.xlabel('Age')\n",
+    "    plt.ylabel(idp) \n",
+    "    plt.title(idp)\n",
+    "    plt.xlim((0,90))\n",
+    "    plt.savefig(os.path.join(idp_dir, 'centiles_' + str(sex)),  bbox_inches='tight')\n",
+    "    plt.show()\n",
+    "    \n",
+    "os.chdir(out_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "88d2dbc0-e82f-4af5-91eb-dc8aa60f6ba7",
+   "metadata": {
+    "id": "88d2dbc0-e82f-4af5-91eb-dc8aa60f6ba7"
+   },
+   "source": [
+    "The deviation scores are output as a text file in separate folders. We want to summarize the deviation scores across all models estimates so we can organize them into a single file, and merge the deviation scores into the original data file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3fb0ced-ed44-487c-86b8-07b9fc04d64e",
+   "metadata": {
+    "id": "e3fb0ced-ed44-487c-86b8-07b9fc04d64e"
+   },
+   "outputs": [],
+   "source": [
+    "! mkdir deviation_scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "571f549e-9edd-4f8b-a6b3-d76cd23609f0",
+   "metadata": {
+    "id": "571f549e-9edd-4f8b-a6b3-d76cd23609f0"
+   },
+   "outputs": [],
+   "source": [
+    "! for i in *; do if [[ -e ${i}/Z_predict.txt ]]; then cp ${i}/Z_predict.txt deviation_scores/${i}_Z_predict.txt; fi; done"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f63da6c-91e8-4852-91a7-a4e8bc9d9f31",
+   "metadata": {
+    "id": "9f63da6c-91e8-4852-91a7-a4e8bc9d9f31"
+   },
+   "outputs": [],
+   "source": [
+    "z_dir = '/content/braincharts/models/' + model_name + '/deviation_scores/'\n",
+    "\n",
+    "filelist = [name for name in os.listdir(z_dir)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8791195b-09a9-4251-8fd7-35e80a028d2f",
+   "metadata": {
+    "id": "8791195b-09a9-4251-8fd7-35e80a028d2f"
+   },
+   "outputs": [],
+   "source": [
+    "os.chdir(z_dir)\n",
+    "Z_df = pd.concat([pd.read_csv(item, names=[item[:-4]]) for item in filelist], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1054959-dd17-4c1b-b1db-56cf849ecae2",
+   "metadata": {
+    "id": "f1054959-dd17-4c1b-b1db-56cf849ecae2"
+   },
+   "outputs": [],
+   "source": [
+    "df_te.reset_index(inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ab00be4-d9c8-49aa-b407-f69946ca2d6c",
+   "metadata": {
+    "id": "6ab00be4-d9c8-49aa-b407-f69946ca2d6c"
+   },
+   "outputs": [],
+   "source": [
+    "Z_df['sub_id'] = df_te['sub_id']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f6185b6-d9d7-4651-bbab-2cfba6c46963",
+   "metadata": {
+    "id": "9f6185b6-d9d7-4651-bbab-2cfba6c46963"
+   },
+   "outputs": [],
+   "source": [
+    "df_te_Z = pd.merge(df_te, Z_df, on='sub_id', how='inner')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae932714-60c3-4a36-8b72-cd9086a25761",
+   "metadata": {
+    "id": "ae932714-60c3-4a36-8b72-cd9086a25761"
+   },
+   "outputs": [],
+   "source": [
+    "df_te_Z.to_csv('OpenNeuroTransfer_yeo17_deviation_scores.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "name": "apply_normative_models.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/scripts/fit_normative_models_yeo17.ipynb b/scripts/fit_normative_models_yeo17.ipynb
new file mode 100644
index 0000000..5f12955
--- /dev/null
+++ b/scripts/fit_normative_models_yeo17.ipynb
@@ -0,0 +1,366 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4b64f505-ad16-437a-94de-2646f35ae55f",
+   "metadata": {},
+   "source": [
+    "## Estimating lifespan normative models\n",
+    "\n",
+    "This notebook provides a complete walkthrough for an analysis of normative modelling in a large sample as described in the accompanying paper. Note that this script is provided principally for completeness (e.g. to assist in fitting normative models to new datasets). All pre-estimated normative models are already provided.\n",
+    "\n",
+    "First, if necessary, we install PCNtoolkit (note: this tutorial requires at least version 0.20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84ec2ca6-c0a2-4abf-8f05-29edc9e0fa24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install pcntoolkit==0.26"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "909c3b45-ad46-4e6d-8732-dc5ac68488c6",
+   "metadata": {},
+   "source": [
+    "Then we import the required libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83c494d3-6ebd-4cde-aff0-8fc9344374dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import pickle\n",
+    "from matplotlib import pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n",
+    "from pcntoolkit.normative import estimate, predict, evaluate\n",
+    "from pcntoolkit.util.utils import compute_MSLL, create_design_matrix\n",
+    "from nm_utils import calibration_descriptives, remove_bad_subjects, load_2d"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9822cc19-48e9-428b-8c5e-e059fd2d23f7",
+   "metadata": {},
+   "source": [
+    "Now, we configure the locations in which the data are stored. You will need to configure this for your specific installation\n",
+    "\n",
+    "**Notes:** \n",
+    "- The data are assumed to be in CSV format and will be loaded as pandas dataframes\n",
+    "- Generally the raw data will be in a different location to the analysis\n",
+    "- The data can have arbitrary columns but some are required by the script, i.e. 'age', 'sex' and 'site', plus the phenotypes you wish to estimate (see below)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7da01c88-7033-498b-a811-79ad58e8c17a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# where the raw data are stored\n",
+    "data_dir = '<path-to-your>/data'\n",
+    "\n",
+    "# where the analysis takes place\n",
+    "root_dir = os.path.join(data_dir,'braincharts')\n",
+    "out_dir = os.path.join(root_dir,'models','lifespan_yeo17_17K_40sites') # Train/test split model used for evaluating\n",
+    "#out_dir = os.path.join(root_dir,'models','lifespan_yeo17_22K_40sites') # full model used for transfering\n",
+    "\n",
+    "# create the output directory if it does not already exist\n",
+    "os.makedirs(out_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "01141f19-a960-4823-baad-8604975304c3",
+   "metadata": {},
+   "source": [
+    "Now we load the data. We will load one pandas dataframe for the training set and one dataframe for the test set. We also configrure a list of site ids."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "850fee6b-421f-41d9-8fd6-7e1dafbf0e9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_tr = pd.read_csv(os.path.join(data_dir,'lifespan_controls_yeo17_tr_0.csv'), low_memory=False) \n",
+    "df_te = pd.read_csv(os.path.join(data_dir,'lifespan_controls_yeo17_te_0.csv'), low_memory=False)\n",
+    "\n",
+    "# extract a list of unique site ids from the training set\n",
+    "site_ids =  sorted(set(df_tr['site'].to_list()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7e6114f-280d-422d-9b0e-3f245330b0f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# All data is in training set. This model is used to transfer model to new datasets.\n",
+    "# Uncomment below lines and run this block when training the full model (lifespan_yeo17_22K_40sites).\n",
+    "\n",
+    "#df = pd.merge(df_tr, df_te, how='outer')\n",
+    "#df_tr = df\n",
+    "#df_te = df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29f9593a-d3c9-4d08-a877-8794203c0001",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Configure which models to fit\n",
+    "\n",
+    "Next, we load the image derived phenotypes (IDPs) which we will process in this analysis. This is effectively just a list of columns in your dataframe. Here we estimate normative models for the left hemisphere, right hemisphere and cortical structures."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7438ef7e-9340-4f13-8d57-816918923705",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the idps to process\n",
+    "with open(os.path.join(root_dir,'docs','phenotypes_yeo17.txt')) as f:\n",
+    "    idp_ids = f.read().splitlines()\n",
+    "\n",
+    "# we could also just specify a list of IDPs\n",
+    "#idp_ids = ['VisualB_TemporalParietal', 'SensorimotorA_SensorimotorB']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5d791db6-8fe5-450c-88eb-84a390b8753a",
+   "metadata": {},
+   "source": [
+    "### Configure model parameters\n",
+    "\n",
+    "Now, we configure some parameters for the regression model we use to fit the normative model. Here we will use a 'warped' Bayesian linear regression model. To model non-Gaussianity, we select a sin arcsinh warp and to model non-linearity, we stick with the default value for the basis expansion (a cubic b-spline basis set with 5 knot points). Since we are sticking with the default value, we do not need to specify any parameters for this, but we do need to specify the limits. We choose to pad the input by a few years either side of the input range. We will also set a couple of options that control the estimation of the model\n",
+    "\n",
+    "For further details about the likelihood warping approach, see [Fraza et al 2021](https://www.biorxiv.org/content/10.1101/2021.04.05.438429v1)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e44e257c-676e-49d8-89ec-657e506c3b74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# which data columns do we wish to use as covariates? \n",
+    "cols_cov = ['age','sex', 'mean_FD']\n",
+    "\n",
+    "# which warping function to use? We can set this to None in order to fit a vanilla Gaussian noise model\n",
+    "warp =  'WarpSinArcsinh'\n",
+    "\n",
+    "# limits for cubic B-spline basis \n",
+    "xmin = -5 \n",
+    "xmax = 110\n",
+    "\n",
+    "# Do we want to force the model to be refit every time? \n",
+    "force_refit = True\n",
+    "\n",
+    "# Absolute Z treshold above which a sample is considered to be an outlier (without fitting any model)\n",
+    "outlier_thresh = 7"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "896842d7-8913-4137-9d86-4757c42bcf1b",
+   "metadata": {},
+   "source": [
+    "### Fit the models\n",
+    "\n",
+    "Now we fit the models. This involves looping over the IDPs we have selected. We will use a module from PCNtoolkit to set up the design matrices, containing the covariates, fixed effects for site and nonlinear basis expansion. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4e9b50c-574b-4e2c-a511-cc444db4393e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for idp_num, idp in enumerate(idp_ids): \n",
+    "    print('Running IDP', idp_num, idp, ':')\n",
+    "   \n",
+    "    # set output dir \n",
+    "    idp_dir = os.path.join(out_dir, idp)\n",
+    "    os.makedirs(os.path.join(idp_dir), exist_ok=True)\n",
+    "    os.chdir(idp_dir)\n",
+    "    \n",
+    "    # extract the response variables for training and test set\n",
+    "    y_tr = df_tr[idp].to_numpy() \n",
+    "    y_te = df_te[idp].to_numpy()\n",
+    "    \n",
+    "    # remove gross outliers and implausible values\n",
+    "    yz_tr = (y_tr - np.mean(y_tr)) / np.std(y_tr)\n",
+    "    yz_te = (y_te - np.mean(y_te)) / np.std(y_te)\n",
+    "    nz_tr = np.abs(yz_tr) < outlier_thresh\n",
+    "    nz_te = np.abs(yz_te) < outlier_thresh\n",
+    "    y_tr = y_tr[nz_tr]\n",
+    "    y_te = y_te[nz_te]\n",
+    "    \n",
+    "    # write out the response variables for training and test\n",
+    "    resp_file_tr = os.path.join(idp_dir, 'resp_tr.txt')\n",
+    "    resp_file_te = os.path.join(idp_dir, 'resp_te.txt') \n",
+    "    np.savetxt(resp_file_tr, y_tr)\n",
+    "    np.savetxt(resp_file_te, y_te)\n",
+    "        \n",
+    "    # configure the design matrix\n",
+    "    X_tr = create_design_matrix(df_tr[cols_cov].loc[nz_tr], \n",
+    "                                site_ids = df_tr['site'].loc[nz_tr],\n",
+    "                                basis = 'bspline', \n",
+    "                                xmin = xmin, \n",
+    "                                xmax = xmax)\n",
+    "    X_te = create_design_matrix(df_te[cols_cov].loc[nz_te], \n",
+    "                                site_ids = df_te['site'].loc[nz_te], \n",
+    "                                all_sites=site_ids,\n",
+    "                                basis = 'bspline', \n",
+    "                                xmin = xmin, \n",
+    "                                xmax = xmax)\n",
+    "\n",
+    "    # configure and save the covariates\n",
+    "    cov_file_tr = os.path.join(idp_dir, 'cov_bspline_tr.txt')\n",
+    "    cov_file_te = os.path.join(idp_dir, 'cov_bspline_te.txt')\n",
+    "    np.savetxt(cov_file_tr, X_tr)\n",
+    "    np.savetxt(cov_file_te, X_te)\n",
+    "\n",
+    "    if not force_refit and os.path.exists(os.path.join(idp_dir, 'Models', 'NM_0_0_estimate.pkl')):\n",
+    "        print('Making predictions using a pre-existing model...')\n",
+    "        suffix = 'predict'\n",
+    "        \n",
+    "        # Make prdictsion with test data\n",
+    "        predict(cov_file_te, \n",
+    "                alg='blr', \n",
+    "                respfile=resp_file_te, \n",
+    "                model_path=os.path.join(idp_dir,'Models'),\n",
+    "                outputsuffix=suffix)\n",
+    "    else:\n",
+    "        print('Estimating the normative model...')\n",
+    "        estimate(cov_file_tr, resp_file_tr, testresp=resp_file_te, \n",
+    "                 testcov=cov_file_te, alg='blr', optimizer = 'l-bfgs-b', \n",
+    "                 savemodel=True, warp=warp, warp_reparam=True)\n",
+    "        suffix = 'estimate'\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "925f77cf-c873-4047-91ac-50b9571704fd",
+   "metadata": {},
+   "source": [
+    "### Compute error metrics\n",
+    "\n",
+    "In this section we compute the following error metrics for all IDPs (all evaluated on the test set):\n",
+    "\n",
+    "- Negative log likelihood (NLL)\n",
+    "- Explained variance (EV)\n",
+    "- Mean standardized log loss (MSLL)\n",
+    "- Bayesian information Criteria (BIC)\n",
+    "- Skew and Kurtosis of the Z-distribution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e9d7500-4f46-4ee1-9756-81758ae5b1d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialise dataframe we will use to store quantitative metrics \n",
+    "blr_metrics = pd.DataFrame(columns = ['eid', 'NLL', 'EV', 'MSLL', 'BIC','Skew','Kurtosis'])\n",
+    "\n",
+    "for idp_num, idp in enumerate(idp_ids): \n",
+    "    idp_dir = os.path.join(out_dir, idp)\n",
+    "    \n",
+    "    # load the predictions and true data. We use a custom function that ensures 2d arrays\n",
+    "    # equivalent to: y = np.loadtxt(filename); y = y[:, np.newaxis]\n",
+    "    yhat_te = load_2d(os.path.join(idp_dir, 'yhat_' + suffix + '.txt'))\n",
+    "    s2_te = load_2d(os.path.join(idp_dir, 'ys2_' + suffix + '.txt'))\n",
+    "    y_te = load_2d(os.path.join(idp_dir, 'resp_te.txt'))\n",
+    "    \n",
+    "    with open(os.path.join(idp_dir,'Models', 'NM_0_0_estimate.pkl'), 'rb') as handle:\n",
+    "        nm = pickle.load(handle) \n",
+    "    \n",
+    "    # compute error metrics\n",
+    "    if warp is None:\n",
+    "        metrics = evaluate(y_te, yhat_te)  \n",
+    "        \n",
+    "        # compute MSLL manually as a sanity check\n",
+    "        y_tr_mean = np.array( [[np.mean(y_tr)]] )\n",
+    "        y_tr_var = np.array( [[np.var(y_tr)]] )\n",
+    "        MSLL = compute_MSLL(y_te, yhat_te, s2_te, y_tr_mean, y_tr_var)         \n",
+    "    else:\n",
+    "        warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1] \n",
+    "        W = nm.blr.warp\n",
+    "        \n",
+    "        # warp predictions\n",
+    "        med_te = W.warp_predictions(np.squeeze(yhat_te), np.squeeze(s2_te), warp_param)[0]\n",
+    "        med_te = med_te[:, np.newaxis]\n",
+    "       \n",
+    "        # evaluation metrics\n",
+    "        metrics = evaluate(y_te, med_te)\n",
+    "        \n",
+    "        # compute MSLL manually\n",
+    "        y_te_w = W.f(y_te, warp_param)\n",
+    "        y_tr_w = W.f(y_tr, warp_param)\n",
+    "        y_tr_mean = np.array( [[np.mean(y_tr_w)]] )\n",
+    "        y_tr_var = np.array( [[np.var(y_tr_w)]] )\n",
+    "        MSLL = compute_MSLL(y_te_w, yhat_te, s2_te, y_tr_mean, y_tr_var)     \n",
+    "    \n",
+    "    Z = np.loadtxt(os.path.join(idp_dir, 'Z_' + suffix + '.txt'))\n",
+    "    [skew, sdskew, kurtosis, sdkurtosis, semean, sesd] = calibration_descriptives(Z)\n",
+    "    \n",
+    "    BIC = len(nm.blr.hyp) * np.log(y_tr.shape[0]) + 2 * nm.neg_log_lik\n",
+    "    \n",
+    "    blr_metrics.loc[len(blr_metrics)] = [idp, nm.neg_log_lik, metrics['EXPV'][0], \n",
+    "                                         MSLL[0], BIC, skew, kurtosis]\n",
+    "    \n",
+    "display(blr_metrics)\n",
+    "\n",
+    "blr_metrics.to_csv(os.path.join(out_dir,'blr_metrics.csv'))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}