Skip to content

Commit 61568d1

Browse files
authored
Merge pull request #1 from SysBioChalmers/devel
Code and results for publication
2 parents 734b987 + c41af6f commit 61568d1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+413660
-2931
lines changed

.gitattributes

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
* text=auto
2+
*.m text diff=matlab
3+
.gitattributes export-ignore
4+
.gitignore export-ignore
5+
.github export-ignore

.gitignore

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Mac OS X
2+
.DS_Store
3+
.AppleDouble
4+
.LSOverride
5+
6+
# Icon must end with two \r
7+
Icon
8+
9+
# Thumbnails
10+
._*
11+
12+
# Files that might appear in the root of a volume
13+
.DocumentRevisions-V100
14+
.fseventsd
15+
.Spotlight-V100
16+
.TemporaryItems
17+
.Trashes
18+
.VolumeIcon.icns
19+
20+
# Directories potentially created on remote AFP share
21+
.AppleDB
22+
.AppleDesktop
23+
Network Trash Folder
24+
Temporary Items
25+
.apdisk
26+
27+
# MATLAB
28+
*.asv
29+
30+
# R
31+
.Rhistory
32+
.RData
33+
34+
# Binaries
35+
*.docx
36+
*.doc
37+
*.xlsx
38+
*.xls
39+
40+
# Temporary (Office) files
41+
~$*
42+
scrap/
43+
44+
# Cloned repositories
45+
code/GECKO
46+
code/ecModels
47+
code/yeast-GEM

LICENSE.md

Lines changed: 541 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
1-
"# overflow"
1+
## Instructions
2+
The `code` folder contains a number of scripts to reconstruct the models and replicate the analysis. Note that the user should have [RAVEN 2.4.0](https://github.com/SysBioChalmers/RAVEN/releases) or later installed.
3+
4+
- `prepareEnvironment.m`: run this script to prepare MATLAB for model reconstruction and analysis. This includes (1) cloning the correct GECKO version; (2) if required cloning yeast-GEM and reconstructing ecYeast-GEM 8.1.3 [also already distributed with this repository]; (3) load all measured flux and proteomics data; (4) set parameters for GECKO.
5+
- `generateProtModels.m`: generate the five condition specific proteome-constrained ec-models, and store these in the models subdirectory. In `results/modelGeneration` is written which enzyme abundances were flexibilized to allow growth at the measured dilution rate. The models are constrained for glucose uptake, growth rate is set to dilution rate and the objective function is set to minimization of the unmeasured protein pool usage. Results from this FBA is written to `results/modelSimulation`.
6+
- `ribosome.m`: add ribosomal subunits to the ec-models. In `results/modelGeneration` is plotted the average abundances of the ribosomal subunits, to identify the "core" ribosome to be included in the model. Also written in this subdirectory is which subunit abundances were flexibilized to allow growth at the measured dilution rate.
7+
- `analyzeUsage.m`: run same FBA as in `generateProtModels` and summarize the enzyme usages in `results/enzymeUsage`, where the capacity and absolute usages are stored separately, together with a summary for each subsystem.

code/DataConstrains.m

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
function model = DataConstrains(model,compounds,bounds,flexBounds)
2+
if ~isempty(compounds)
3+
disp('Constraining byproducts exchange fluxes with fermentation data')
4+
for i=1:length(compounds)
5+
%Get exchange rxn index
6+
if ~strcmpi(compounds{i},'oxygen')
7+
rxnName = [compounds{i} ' exchange'];
8+
else
9+
rxnName = [compounds{i} ' exchange (reversible)'];
10+
end
11+
BPindex = find(strcmpi(model.rxnNames,rxnName));
12+
if ~isempty(BPindex)
13+
disp([compounds{i} ' exchange has been constrained to: ' num2str(bounds(i)) ' [mmol/gDw h]'])
14+
%Allow some flexibility
15+
model = setParam(model,'ub',BPindex,flexBounds(1)*bounds(i));
16+
if numel(flexBounds)>1
17+
model = setParam(model,'lb',BPindex,flexBounds(2)*bounds(i));
18+
end
19+
else
20+
disp(['No exchange rxn for ' compounds{i} ' was found in ecModel'])
21+
end
22+
end
23+
end
24+
disp(' ')
25+
end

code/analyzeUsage.m

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
% Analyze enzymeUsage per subsystem
2+
% Assumed is that models are already constructed by generateProtModels
3+
% and ribosome.m is run to add ribosomal subunits.
4+
5+
load('../models/ecModel_P_CN4.mat')
6+
load('../models/ecModel_P_CN22.mat')
7+
load('../models/ecModel_P_CN38.mat')
8+
load('../models/ecModel_P_CN75.mat')
9+
load('../models/ecModel_P_hGR.mat')
10+
11+
ecModels{1}=ecModelP_CN4;
12+
ecModels{2}=ecModelP_CN22;
13+
ecModels{3}=ecModelP_CN38;
14+
ecModels{4}=ecModelP_CN75;
15+
ecModels{5}=ecModelP_hGR;
16+
17+
%% Get enzymes usages to each reaction
18+
for i=1:5
19+
disp(['Now testing: ' flux.conds{i}])
20+
sol{i} = solveLP(ecModels{i});
21+
[absUsage{i}, capUsage{i}, UB{i}, protName{i}]=enzymeUsage(ecModels{i},sol{i}.x,true);
22+
printFluxes(ecModels{i},sol{i}.x,false,0,fullfile('..','results','modelSimulation',['allFluxes_' flux.conds{i},'.txt']),'%rxnID\t%rxnName\t%eqn\t%flux\n');
23+
end
24+
25+
%% Prepare output
26+
clear out
27+
out(:,1)=ecModels{1}.enzymes;
28+
out(:,2)=ecModels{1}.enzGenes;
29+
out(:,3)=ecModels{1}.enzNames;
30+
for i=1:5
31+
out(:,3+i)=strtrim(cellstr(num2str(capUsage{i},3)));
32+
end
33+
for i=1:5
34+
out(:,8+i)=strtrim(cellstr(num2str(absUsage{i},3)));
35+
end
36+
for i=1:5
37+
out(:,13+i)=strtrim(cellstr(num2str(UB{i},3)));
38+
end
39+
40+
%% All usage per subSystem
41+
head={'protID','geneID','protName','capUse_CN4','capUse_CN22','capUse_CN38','capUse_CN75',...
42+
'capUse_hGR','absUse_CN4','absUse_CN22','absUse_CN38','absUse_CN75','absUse_hGR',...
43+
'UB_CN4','UB_CN22','UB_CN38','UB_CN75','UB_hGR'};
44+
out=cell2table(out,'VariableNames',head);
45+
writetable(out,fullfile('..','results','enzymeUsage','enzymeUsages.txt'),'Delimiter','\t')

code/boxplotEnzymeUsage.R

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# This script takes the enzyme usage data from ec-models and plots this in various ways
2+
#install.packages("tidyverse") # Install tidyverse if required
3+
library(tidyverse)
4+
# Adjust to the correct directory
5+
setwd("C:/Work/GitHub/overflow/results/enzymeUsage")
6+
7+
# Load usage information and remove proteins with always zero usage
8+
capUse <- read.delim('enzymeUsages.txt')
9+
capUse <- capUse[,1:8]
10+
capSum <- rowSums(capUse[,4:8])
11+
capUse <- capUse[!capSum==0,]
12+
capUse[,4:8] <- capUse[,4:8]*100
13+
14+
# Plot based on GO term annotation as obtained from Uniprot (first rearrange data)
15+
GO <- read.delim('../../data/selectedAnnotation.txt', stringsAsFactors = F)
16+
17+
# Only keep data from relevant GO terms
18+
capUse <- capUse[capUse$protID %in% GO$Entry,]
19+
idx <- match(capUse$protID, GO$Entry)
20+
capUse$GOterm <- GO$system[idx]
21+
colnames(capUse) <- gsub('capUse_','',colnames(capUse))
22+
23+
capUse <- capUse %>% mutate_if(is.numeric, round, digits = 3)
24+
write_delim(capUse,'../../results/enzymeUsage/capUsage.txt',delim = '\t')
25+
26+
capUse <- gather(capUse, 'Condition', 'Usage', 4:8)
27+
capUse$GOterm <- factor(capUse$GOterm, levels=c('Glycolysis','TCA cycle','ETC','PP shunt','THF cycle','Ribosome','Nitrogen metabolism','Amino acid metabolism'))
28+
capUse$Condition <- factor(capUse$Condition, levels=c('CN4','CN22','CN38','CN75','hGR'))
29+
30+
plot1<-capUse[capUse$GOterm %in% c('Glycolysis','TCA cycle','ETC','Ribosome'),]
31+
ggplot(plot1, aes(x = Condition, y = Usage, color=GOterm)) +
32+
geom_boxplot(lwd = 0.35) +
33+
scale_color_manual(values=c('#CBBBA0','#1D1D1B','#1D71B8','#878787')) +
34+
facet_grid(. ~ GOterm) +
35+
labs(x = '', y = 'Capacity usage (%)') +
36+
theme_classic() +
37+
theme(axis.text.x=element_text(angle = 90, vjust = 0.5), text = element_text(size=7),
38+
line = element_line(size=0.15), strip.background = element_blank(),
39+
axis.line = element_line(size=0.15), legend.position='none')
40+
ggsave("selectedGOtermUsage.pdf", width=10, height=4.5, units='cm')
41+
42+
plot2<-capUse[capUse$GOterm %in% c('PP shunt','THF cycle','Nitrogen metabolism','Amino acid metabolism'),]
43+
ggplot(plot2, aes(x = Condition, y = Usage, color=GOterm)) +
44+
geom_boxplot(lwd = 0.35) +
45+
scale_color_manual(values=c('#CBBBA0','#1D1D1B','#1D71B8','#878787')) +
46+
facet_grid(. ~ GOterm) +
47+
labs(x = '', y = 'Capacity usage (%)') +
48+
theme_classic() +
49+
theme(axis.text.x=element_text(angle = 90, vjust = 0.5), text = element_text(size=7),
50+
line = element_line(size=0.15), strip.background = element_blank(),
51+
axis.line = element_line(size=0.15), legend.position='none')
52+
ggsave("supplementGOtermUsage.pdf", width=10, height=4.5, units='cm')
53+
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
function [model,enzUsages,modifications,GAM,massCoverage] = constrainEnzymes(model,f,GAM,Ptot,pIDs,data,gRate,c_UptakeExp,parameters)
2+
% constrainEnzymes
3+
%
4+
% Main function for overlaying proteomics data on an enzyme-constrained
5+
% model. If chosen, also scales the protein content, optimizes GAM, and
6+
% flexibilizes the proteomics data.
7+
%
8+
% model ecModel.
9+
% f (Opt) Estimated mass fraction of enzymes in model.
10+
% GAM (Opt) Growth-associated maintenance value. If not
11+
% provided, it will be fitted to chemostat data.
12+
% Ptot (Opt) Total protein content, provide if desired content
13+
% is different from the one reported in getModelParameters [gProt/gDw]
14+
% pIDs (Opt) Protein IDs from proteomics data.
15+
% data (Opt) Protein abundances from proteomics data [mmol/gDW].
16+
% gRate (Opt) Experimental growth rate at which the proteomics
17+
% data were obtained [1/h]
18+
% c_UptakeExp (Opt) Experimentally measured glucose uptake rate
19+
% [mmol/gDW h].
20+
%
21+
% model ecModel with calibrated enzyme usage upper bounds
22+
% enzUsages Calculated enzyme usages after final calibration
23+
% (enzyme_i demand/enzyme_i upper bound)
24+
% modifications Table with all the modified values
25+
% (Protein ID/old value/Flexibilized value)
26+
% GAM Fitted GAM value for the ecModel
27+
% massCoverage Ratio between measured and total mass of protein in the model
28+
%
29+
% Usage: [model,enzUsages,modifications, GAM,massCoverage] = constrainEnzymes(model,f,GAM,Ptot,pIDs,data,gRate,c_UptakeExp)
30+
%
31+
% Benjamin J. Sanchez. Last update 2018-12-11
32+
% Ivan Domenzain. Last update 2020-03-02
33+
%
34+
35+
%get model parameters
36+
if nargin<9
37+
cd ..
38+
parameters = getModelParameters;
39+
cd limit_proteins
40+
end
41+
sigma = parameters.sigma;
42+
c_source = parameters.c_source;
43+
44+
%Compute f if not provided:
45+
if nargin < 2
46+
[f,~] = measureAbundance(model.enzymes);
47+
else
48+
if isempty(f)
49+
[f,~] = measureAbundance(model.enzymes);
50+
end
51+
end
52+
%Leave GAM empty if not provided (will be fitted later):
53+
if nargin < 3
54+
GAM = [];
55+
end
56+
%Load Ptot if not provided:
57+
if nargin < 4
58+
Ptot = parameters.Ptot;
59+
end
60+
%No UB will be changed if no data is available -> pool = all enzymes(FBAwMC)
61+
if nargin < 5
62+
pIDs = cell(0,1);
63+
data = zeros(0,1);
64+
end
65+
%Remove zeros or negative values
66+
data = cleanDataset(data);
67+
%Assign concentrations as UBs [mmol/gDW]:
68+
model.concs = nan(size(model.enzymes)); %OBS: min value is zero!!
69+
disp('Matching data to enzymes in model...')
70+
for i = 1:length(model.enzymes)
71+
match = false;
72+
for j = 1:length(pIDs)
73+
if strcmpi(pIDs{j},model.enzymes{i}) && ~match
74+
model.concs(i) = data(j)*model.MWs(i); %g/gDW
75+
rxn_name = ['prot_' model.enzymes{i} '_exchange'];
76+
pos = strcmpi(rxn_name,model.rxns);
77+
model.ub(pos) = data(j);
78+
match = true;
79+
end
80+
end
81+
end
82+
%Count mass of non-measured enzymes:
83+
measured = ~isnan(model.concs);
84+
concs_measured = model.concs(measured);
85+
Pmeasured = sum(concs_measured);
86+
%Get protein content in biomass pseudoreaction:
87+
Pbase = sumProtein(model);
88+
if Pmeasured > 0
89+
%Expected total enzyme concentration
90+
enzymeConc=Ptot*f;
91+
%Non-measured part will be pooled
92+
Ppool=enzymeConc-Pmeasured;
93+
fs=Ppool/Pbase;
94+
else
95+
fs = f*sigma;
96+
end
97+
%Constrain the rest of enzymes with the pool assumption:
98+
if sum(strcmp(model.rxns,'prot_pool_exchange')) == 0
99+
model = constrainPool(model,~measured,full(fs*Pbase));
100+
end
101+
if sum(data)==0
102+
%Modify protein/carb content and GAM:
103+
[model,GAM] = scaleBioMass(model,Ptot,GAM);
104+
end
105+
%Display some metrics:
106+
disp(['Total protein amount measured = ' num2str(Pmeasured) ' g/gDW'])
107+
disp(['Total enzymes measured = ' num2str(sum(measured)) ' enzymes'])
108+
disp(['Enzymes in model with 0 g/gDW = ' num2str(sum(concs_measured==0)) ' enzymes'])
109+
disp(['Total protein amount not measured = ' num2str(Ptot - Pmeasured) ' g/gDW'])
110+
disp(['Total enzymes not measured = ' num2str(sum(~measured)) ' enzymes'])
111+
disp(['Total protein in model = ' num2str(Ptot) ' g/gDW'])
112+
enzUsages = [];
113+
if nargin > 7
114+
model = updateProtPool(model,Ptot,f*sigma);
115+
[tempModel,enzUsages,modifications] = flexibilizeProteins(model,gRate,c_UptakeExp,c_source);
116+
model = updateProtPool(tempModel,Ptot,f*sigma);
117+
end
118+
massCoverage = Pmeasured/Ptot;
119+
if isempty(enzUsages)
120+
enzUsages = table({},zeros(0,1),'VariableNames',{'prot_IDs' 'usage'});
121+
modifications = table({},zeros(0,1),zeros(0,1),zeros(0,1),'VariableNames',{'protein_IDs' 'previous_values' 'modified_values' 'flex_mass'});
122+
else
123+
plotHistogram(enzUsages.usage,'Enzyme usage [-]',[0,1],'Enzyme usages','usages')
124+
end
125+
%Plot histogram (if there are measurements):
126+
%plotHistogram(concs_measured,'Protein amount [mg/gDW]',[1e-3,1e3],'Modelled Protein abundances','abundances')
127+
end
128+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
129+
function plotHistogram(variable,xlabelStr,xlimits,titleStr,option)
130+
if iscell(variable)
131+
cell2mat(variable);
132+
end
133+
if sum(variable) > 0
134+
variable(variable==0) = 1E-15;
135+
figure
136+
if strcmpi(option,'abundances')
137+
hist(variable*1e3,10.^(-3:0.5:3))
138+
set(gca,'xscale','log')
139+
else
140+
hist(variable,(0:0.05:1))
141+
end
142+
xlim(xlimits)
143+
xlabel(xlabelStr)
144+
ylabel('Frequency');
145+
title(titleStr)
146+
end
147+
end
148+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149+
function data = cleanDataset(data)
150+
for i=1:length(data)
151+
if data(i)<=0
152+
data(i) = NaN;
153+
end
154+
end
155+
end

0 commit comments

Comments
 (0)