SysBioChalmers
diff --git a/‎.gitattributes‎
Lines changed: 5 additions & 0 deletions b/‎.gitattributes‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 47 additions & 0 deletions b/‎.gitignore‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎LICENSE.md‎
Lines changed: 541 additions & 0 deletions b/‎LICENSE.md‎
Lines changed: 541 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 1 deletion b/‎README.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎code/DataConstrains.m‎
Lines changed: 25 additions & 0 deletions b/‎code/DataConstrains.m‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎code/analyzeUsage.m‎
Lines changed: 45 additions & 0 deletions b/‎code/analyzeUsage.m‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎code/boxplotEnzymeUsage.R‎
Lines changed: 53 additions & 0 deletions b/‎code/boxplotEnzymeUsage.R‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎code/customGECKO/constrainEnzymes.m‎
Lines changed: 155 additions & 0 deletions b/‎code/customGECKO/constrainEnzymes.m‎
Lines changed: 155 additions & 0 deletions
@@ -0,0 +1,5 @@
+*	text=auto
+*.m	text diff=matlab
+.gitattributes	export-ignore
+.gitignore	export-ignore
+.github	export-ignore
@@ -0,0 +1,47 @@
+# Mac OS X
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# MATLAB
+*.asv
+
+# R
+.Rhistory
+.RData
+
+# Binaries
+*.docx
+*.doc
+*.xlsx
+*.xls
+
+# Temporary (Office) files
+~$*
+scrap/
+
+# Cloned repositories
+code/GECKO
+code/ecModels
+code/yeast-GEM
@@ -1 +1,7 @@
-"# overflow" 
+## Instructions
+The `code` folder contains a number of scripts to reconstruct the models and replicate the analysis. Note that the user should have [RAVEN 2.4.0](https://github.com/SysBioChalmers/RAVEN/releases) or later installed.
+
+- `prepareEnvironment.m`: run this script to prepare MATLAB for model reconstruction and analysis. This includes (1) cloning the correct GECKO version; (2) if required cloning yeast-GEM and reconstructing ecYeast-GEM 8.1.3 [also already distributed with this repository]; (3) load all measured flux and proteomics data; (4) set parameters for GECKO.
+- `generateProtModels.m`: generate the five condition specific proteome-constrained ec-models, and store these in the models subdirectory. In `results/modelGeneration` is written which enzyme abundances were flexibilized to allow growth at the measured dilution rate. The models are constrained for glucose uptake, growth rate is set to dilution rate and the objective function is set to minimization of the unmeasured protein pool usage. Results from this FBA is written to `results/modelSimulation`.
+- `ribosome.m`: add ribosomal subunits to the ec-models. In `results/modelGeneration` is plotted the average abundances of the ribosomal subunits, to identify the "core" ribosome to be included in the model. Also written in this subdirectory is which subunit abundances were flexibilized to allow growth at the measured dilution rate.
+- `analyzeUsage.m`: run same FBA as in `generateProtModels` and summarize the enzyme usages in `results/enzymeUsage`, where the capacity and absolute usages are stored separately, together with a summary for each subsystem.
@@ -0,0 +1,25 @@
+function model = DataConstrains(model,compounds,bounds,flexBounds)
+if ~isempty(compounds)
+    disp('Constraining byproducts exchange fluxes with fermentation data')
+    for i=1:length(compounds)
+        %Get exchange rxn index
+        if ~strcmpi(compounds{i},'oxygen')
+            rxnName = [compounds{i} ' exchange'];
+        else
+            rxnName = [compounds{i} ' exchange (reversible)'];
+        end
+        BPindex = find(strcmpi(model.rxnNames,rxnName));
+        if ~isempty(BPindex)
+            disp([compounds{i} ' exchange has been constrained to: ' num2str(bounds(i)) ' [mmol/gDw h]'])
+            %Allow some flexibility
+            model = setParam(model,'ub',BPindex,flexBounds(1)*bounds(i));
+            if numel(flexBounds)>1
+                model = setParam(model,'lb',BPindex,flexBounds(2)*bounds(i));
+            end
+        else
+            disp(['No exchange rxn for ' compounds{i} ' was found in ecModel'])
+        end
+    end
+end
+disp(' ')
+end
@@ -0,0 +1,45 @@
+% Analyze enzymeUsage per subsystem
+% Assumed is that models are already constructed by generateProtModels
+% and ribosome.m is run to add ribosomal subunits.
+
+load('../models/ecModel_P_CN4.mat')
+load('../models/ecModel_P_CN22.mat')
+load('../models/ecModel_P_CN38.mat')
+load('../models/ecModel_P_CN75.mat')
+load('../models/ecModel_P_hGR.mat')
+
+ecModels{1}=ecModelP_CN4;
+ecModels{2}=ecModelP_CN22;
+ecModels{3}=ecModelP_CN38;
+ecModels{4}=ecModelP_CN75;
+ecModels{5}=ecModelP_hGR;
+
+%% Get enzymes usages to each reaction
+for i=1:5
+    disp(['Now testing: ' flux.conds{i}])
+    sol{i} = solveLP(ecModels{i});
+    [absUsage{i}, capUsage{i}, UB{i}, protName{i}]=enzymeUsage(ecModels{i},sol{i}.x,true);
+    printFluxes(ecModels{i},sol{i}.x,false,0,fullfile('..','results','modelSimulation',['allFluxes_' flux.conds{i},'.txt']),'%rxnID\t%rxnName\t%eqn\t%flux\n');
+end
+
+%% Prepare output
+clear out
+out(:,1)=ecModels{1}.enzymes;
+out(:,2)=ecModels{1}.enzGenes;
+out(:,3)=ecModels{1}.enzNames;
+for i=1:5
+    out(:,3+i)=strtrim(cellstr(num2str(capUsage{i},3)));
+end
+for i=1:5
+    out(:,8+i)=strtrim(cellstr(num2str(absUsage{i},3)));
+end
+for i=1:5
+    out(:,13+i)=strtrim(cellstr(num2str(UB{i},3)));
+end
+
+%% All usage per subSystem
+head={'protID','geneID','protName','capUse_CN4','capUse_CN22','capUse_CN38','capUse_CN75',...
+    'capUse_hGR','absUse_CN4','absUse_CN22','absUse_CN38','absUse_CN75','absUse_hGR',...
+    'UB_CN4','UB_CN22','UB_CN38','UB_CN75','UB_hGR'};
+out=cell2table(out,'VariableNames',head);
+writetable(out,fullfile('..','results','enzymeUsage','enzymeUsages.txt'),'Delimiter','\t')
@@ -0,0 +1,53 @@
+# This script takes the enzyme usage data from ec-models and plots this in various ways
+#install.packages("tidyverse") # Install tidyverse if required
+library(tidyverse)
+# Adjust to the correct directory 
+setwd("C:/Work/GitHub/overflow/results/enzymeUsage")
+
+# Load usage information and remove proteins with always zero usage
+capUse <- read.delim('enzymeUsages.txt')
+capUse <- capUse[,1:8]
+capSum <- rowSums(capUse[,4:8])
+capUse <- capUse[!capSum==0,]
+capUse[,4:8] <- capUse[,4:8]*100
+
+# Plot based on GO term annotation as obtained from Uniprot (first rearrange data)
+GO <- read.delim('../../data/selectedAnnotation.txt', stringsAsFactors = F)
+
+# Only keep data from relevant GO terms
+capUse <- capUse[capUse$protID %in% GO$Entry,]
+idx <- match(capUse$protID, GO$Entry)
+capUse$GOterm <- GO$system[idx]
+colnames(capUse) <- gsub('capUse_','',colnames(capUse))
+
+capUse <- capUse %>% mutate_if(is.numeric, round, digits = 3)
+write_delim(capUse,'../../results/enzymeUsage/capUsage.txt',delim = '\t')
+
+capUse <- gather(capUse, 'Condition', 'Usage', 4:8)
+capUse$GOterm <- factor(capUse$GOterm, levels=c('Glycolysis','TCA cycle','ETC','PP shunt','THF cycle','Ribosome','Nitrogen metabolism','Amino acid metabolism'))
+capUse$Condition <- factor(capUse$Condition, levels=c('CN4','CN22','CN38','CN75','hGR'))
+
+plot1<-capUse[capUse$GOterm %in% c('Glycolysis','TCA cycle','ETC','Ribosome'),]
+ggplot(plot1, aes(x = Condition, y = Usage, color=GOterm)) +
+  geom_boxplot(lwd = 0.35) +
+  scale_color_manual(values=c('#CBBBA0','#1D1D1B','#1D71B8','#878787')) +
+  facet_grid(. ~ GOterm) +
+  labs(x = '', y = 'Capacity usage (%)') + 
+  theme_classic() +
+  theme(axis.text.x=element_text(angle = 90, vjust = 0.5), text = element_text(size=7), 
+        line = element_line(size=0.15), strip.background = element_blank(),
+        axis.line = element_line(size=0.15), legend.position='none')
+ggsave("selectedGOtermUsage.pdf", width=10, height=4.5, units='cm')
+
+plot2<-capUse[capUse$GOterm %in% c('PP shunt','THF cycle','Nitrogen metabolism','Amino acid metabolism'),]
+ggplot(plot2, aes(x = Condition, y = Usage, color=GOterm)) +
+  geom_boxplot(lwd = 0.35) +
+  scale_color_manual(values=c('#CBBBA0','#1D1D1B','#1D71B8','#878787')) +
+  facet_grid(. ~ GOterm) +
+  labs(x = '', y = 'Capacity usage (%)') + 
+  theme_classic() +
+  theme(axis.text.x=element_text(angle = 90, vjust = 0.5), text = element_text(size=7), 
+        line = element_line(size=0.15), strip.background = element_blank(),
+        axis.line = element_line(size=0.15), legend.position='none')
+ggsave("supplementGOtermUsage.pdf", width=10, height=4.5, units='cm')
+
@@ -0,0 +1,155 @@
+ function [model,enzUsages,modifications,GAM,massCoverage] = constrainEnzymes(model,f,GAM,Ptot,pIDs,data,gRate,c_UptakeExp,parameters)
+% constrainEnzymes
+%
+%   Main function for overlaying proteomics data on an enzyme-constrained
+%   model. If chosen, also scales the protein content, optimizes GAM, and
+%   flexibilizes the proteomics data.
+%
+%   model           ecModel.
+% 	f				(Opt) Estimated mass fraction of enzymes in model.
+%	GAM				(Opt) Growth-associated maintenance value. If not
+%					provided, it will be fitted to chemostat data.
+%   Ptot            (Opt) Total protein content, provide if desired content
+%                   is different from the one reported in getModelParameters [gProt/gDw]
+% 	pIDs			(Opt) Protein IDs from proteomics data.
+%	data			(Opt) Protein abundances from proteomics data [mmol/gDW].
+%   gRate           (Opt) Experimental growth rate at which the proteomics
+%                  data were obtained [1/h]
+%   c_UptakeExp     (Opt) Experimentally measured glucose uptake rate
+%                   [mmol/gDW h].
+%
+%   model           ecModel with calibrated enzyme usage upper bounds
+%   enzUsages       Calculated enzyme usages after final calibration
+%                   (enzyme_i demand/enzyme_i upper bound)
+%   modifications   Table with all the modified values
+%                   (Protein ID/old value/Flexibilized value)
+%   GAM             Fitted GAM value for the ecModel
+%   massCoverage    Ratio between measured and total mass of protein in the model
+%
+%   Usage: [model,enzUsages,modifications, GAM,massCoverage] = constrainEnzymes(model,f,GAM,Ptot,pIDs,data,gRate,c_UptakeExp)
+%
+%   Benjamin J. Sanchez. Last update 2018-12-11
+%   Ivan Domenzain.      Last update 2020-03-02
+%
+
+%get model parameters
+if nargin<9
+    cd ..
+    parameters = getModelParameters;
+    cd limit_proteins
+end
+sigma      = parameters.sigma;
+c_source   = parameters.c_source;
+
+%Compute f if not provided:
+if nargin < 2
+    [f,~] = measureAbundance(model.enzymes);
+else
+    if isempty(f)
+       [f,~] = measureAbundance(model.enzymes);
+    end
+end
+%Leave GAM empty if not provided (will be fitted later):
+if nargin < 3
+    GAM = [];
+end
+%Load Ptot if not provided:
+if nargin < 4
+    Ptot = parameters.Ptot;
+end
+%No UB will be changed if no data is available -> pool = all enzymes(FBAwMC)
+if nargin < 5
+    pIDs = cell(0,1);
+    data = zeros(0,1);
+end
+%Remove zeros or negative values
+data = cleanDataset(data);
+%Assign concentrations as UBs [mmol/gDW]:
+model.concs = nan(size(model.enzymes));      %OBS: min value is zero!!
+disp('Matching data to enzymes in model...')
+for i = 1:length(model.enzymes)
+    match = false;
+    for j = 1:length(pIDs)
+        if strcmpi(pIDs{j},model.enzymes{i}) && ~match
+        	model.concs(i) = data(j)*model.MWs(i); %g/gDW
+            rxn_name       = ['prot_' model.enzymes{i} '_exchange'];
+            pos            = strcmpi(rxn_name,model.rxns);
+            model.ub(pos)  = data(j);
+            match          = true;
+        end
+    end
+end
+%Count mass of non-measured enzymes:
+measured       = ~isnan(model.concs);
+concs_measured = model.concs(measured);
+Pmeasured      = sum(concs_measured);
+%Get protein content in biomass pseudoreaction:
+Pbase = sumProtein(model);
+if Pmeasured > 0
+    %Expected total enzyme concentration
+    enzymeConc=Ptot*f;
+    %Non-measured part will be pooled
+    Ppool=enzymeConc-Pmeasured;
+    fs=Ppool/Pbase;
+else
+    fs = f*sigma;
+end
+%Constrain the rest of enzymes with the pool assumption:
+if sum(strcmp(model.rxns,'prot_pool_exchange')) == 0
+    model = constrainPool(model,~measured,full(fs*Pbase));
+end
+if sum(data)==0
+    %Modify protein/carb content and GAM:
+    [model,GAM] = scaleBioMass(model,Ptot,GAM);
+end
+%Display some metrics:
+disp(['Total protein amount measured = '     num2str(Pmeasured)              ' g/gDW'])
+disp(['Total enzymes measured = '            num2str(sum(measured))          ' enzymes'])
+disp(['Enzymes in model with 0 g/gDW = '     num2str(sum(concs_measured==0)) ' enzymes'])
+disp(['Total protein amount not measured = ' num2str(Ptot - Pmeasured)       ' g/gDW'])
+disp(['Total enzymes not measured = '        num2str(sum(~measured))         ' enzymes'])
+disp(['Total protein in model = '            num2str(Ptot)                   ' g/gDW'])
+enzUsages = [];
+if nargin > 7
+    model     = updateProtPool(model,Ptot,f*sigma);
+    [tempModel,enzUsages,modifications] = flexibilizeProteins(model,gRate,c_UptakeExp,c_source);
+    model     = updateProtPool(tempModel,Ptot,f*sigma);
+end
+massCoverage = Pmeasured/Ptot;
+if isempty(enzUsages)
+    enzUsages      = table({},zeros(0,1),'VariableNames',{'prot_IDs' 'usage'});
+    modifications  = table({},zeros(0,1),zeros(0,1),zeros(0,1),'VariableNames',{'protein_IDs' 'previous_values' 'modified_values' 'flex_mass'});
+else
+     plotHistogram(enzUsages.usage,'Enzyme usage [-]',[0,1],'Enzyme usages','usages')
+end
+%Plot histogram (if there are measurements):
+%plotHistogram(concs_measured,'Protein amount [mg/gDW]',[1e-3,1e3],'Modelled Protein abundances','abundances')
+end
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+function plotHistogram(variable,xlabelStr,xlimits,titleStr,option)
+if iscell(variable)
+    cell2mat(variable);
+end
+if sum(variable) > 0
+    variable(variable==0) = 1E-15;
+    figure
+    if strcmpi(option,'abundances')
+        hist(variable*1e3,10.^(-3:0.5:3))
+        set(gca,'xscale','log')
+    else
+        hist(variable,(0:0.05:1))
+    end
+    xlim(xlimits)
+    xlabel(xlabelStr)
+    ylabel('Frequency');
+    title(titleStr)
+end
+end
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+function data = cleanDataset(data)
+for i=1:length(data)
+    if data(i)<=0
+        data(i) = NaN;
+    end
+end
+end