From cff2a9e32df0389edf52226169ba68ef27ba6dcc Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 3 Jun 2024 01:20:16 -0500 Subject: [PATCH] Checking in fixes to support phenotype simulations --- modelseedpy/core/msgapfill.py | 28 ++++++++---- modelseedpy/core/msgrowthphenotypes.py | 62 +++++++++++++++++++++++--- modelseedpy/core/msmodelutl.py | 30 ++++++++++++- modelseedpy/fbapkg/gapfillingpkg.py | 33 +++++++------- modelseedpy/fbapkg/kbasemediapkg.py | 8 +++- 5 files changed, 125 insertions(+), 36 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 8335878a..84f36d34 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -123,6 +123,8 @@ def test_gapfill_database(self, media, target=None, before_filtering=True): self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database(): return True + if self.gfpkgmgr.getpkg("GapfillingPkg").test_solution.status == 'infeasible': + return False gf_sensitivity = {} if target != "rxn00062_c0": gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) @@ -150,7 +152,7 @@ def test_gapfill_database(self, media, target=None, before_filtering=True): ) return False - def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering=True): + def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering=True,base_filter_only=False): """Prefilters the database by removing any reactions that break specified ATP tests Parameters ---------- @@ -167,7 +169,8 @@ def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions, growth_conditions=growth_conditions, - base_filter=base_filter + base_filter=base_filter, + base_filter_only=base_filter_only ) gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes( "gf_filter", {} @@ -407,7 +410,8 @@ def run_multi_gapfill( check_for_growth=True, gapfilling_mode="Sequential", run_sensitivity_analysis=True, - integrate_solutions=True + integrate_solutions=True, + remove_unneeded_reactions=True ): """Run gapfilling across an array of media conditions ultimately using different integration policies: simultaneous gapfilling, independent gapfilling, cumulative gapfilling Parameters @@ -437,8 +441,9 @@ def run_multi_gapfill( self.model = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) self.mdlutl = MSModelUtil.get(self.model) #Setting the default minimum objective - if not default_minimum_objective: + if default_minimum_objective == None: default_minimum_objective = self.default_minimum_objective + self.gfpkgmgr.getpkg("GapfillingPkg").parameters["minimum_obj"] = default_minimum_objective #Checking that each media to ensure gapfilling works before filtering for media in media_list: currtarget = target @@ -498,7 +503,7 @@ def run_multi_gapfill( solution_dictionary[item] = self.integrate_gapfill_solution( solution, cumulative_solution=cumulative_solution, - remove_unneeded_reactions=True, + remove_unneeded_reactions=remove_unneeded_reactions, check_for_growth=check_for_growth, gapfilling_mode=gapfilling_mode ) @@ -544,6 +549,8 @@ def run_multi_gapfill( self.gfpkgmgr.getpkg("GapfillingPkg").compute_gapfilling_penalties(reaction_scores=self.reaction_scores) self.gfpkgmgr.getpkg("GapfillingPkg").build_gapfilling_objective_function() #Running sensitivity analysis once on the cumulative solution for all media + with open("datacache/solutions.json", 'w') as f: + json.dump(solution_dictionary,f,indent=4,skipkeys=True) if run_sensitivity_analysis: logger.info( "Gapfilling sensitivity analysis running" @@ -618,7 +625,7 @@ def integrate_gapfill_solution( gapfilling_mode : Cumulative, Independent, Simultaneous Specify what the gapfilling mode is because this determines how integration is performed """ - logger.debug(f"Initial solution: {str(solution)}") + logger.info(f"Initial solution: {str(solution)}") original_objective = self.mdlutl.model.objective self.mdlutl.model.objective = solution["target"] self.mdlutl.model.objective.direction = "max" @@ -643,7 +650,7 @@ def integrate_gapfill_solution( #Clearing current bounds because we only want to add reaction in the direction it was gapfilled in rxn.upper_bound = 0 rxn.lower_bound = 0 - logger.debug(f"integrating rxn: {item[0]}") + logger.info(f"integrating rxn: {item[0]}") rxn = self.model.reactions.get_by_id(item[0]) #Setting genes if the reaction has no genes if len(rxn.genes) == 0: @@ -681,6 +688,7 @@ def integrate_gapfill_solution( new_cumulative_reactions.append([item[0], item[1],item[2]]) #Testing the full cumulative solution to see which reactions are needed for current media/target full_solution = cumulative_solution + new_cumulative_reactions + logger.info(f"Full solution: {str(full_solution)}") #Setting up structure to store the finalized solution for this media/target current_media_target_solution = {"growth":0,"media":solution["media"],"target":solution["target"],"minobjective":solution["minobjective"],"binary_check":solution["binary_check"] ,"new":{},"reversed":{}} #If gapfilling is independent, we only check the specific solution @@ -693,6 +701,7 @@ def integrate_gapfill_solution( cumulative_solution.append(item) #elif not remove_unneeded_reactions and not self.mdlutl.find_item_in_solution(cumulative_solution,item): # cumulative_solution.append(item) + logger.info(f"Cumulative media target solution: {str(current_media_target_solution)}") else: unneeded = self.mdlutl.test_solution(full_solution,[solution["target"]],[solution["media"]],[solution["minobjective"]],remove_unneeded_reactions,do_not_remove_list=cumulative_solution)#Returns reactions in input solution that are not needed for growth for item in cumulative_solution: @@ -704,12 +713,13 @@ def integrate_gapfill_solution( cumulative_solution.append(item) #elif not remove_unneeded_reactions: # cumulative_solution.append(item) - logger.debug(f"Unneeded: {str(unneeded)}") + logger.info(f"Unneeded: {str(unneeded)}") + logger.info(f"Cumulative: {str(cumulative_gapfilling)}") #Checking that the final integrated model grows if check_for_growth: self.mdlutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(solution["media"]) current_media_target_solution["growth"] = self.mdlutl.model.slim_optimize() - logger.debug(f"Growth: {str(current_media_target_solution['growth'])} {solution['media'].id}") + logger.info(f"Growth: {str(current_media_target_solution['growth'])} {solution['media'].id}") # Adding the gapfilling solution data to the model, which is needed for saving the model in KBase self.mdlutl.add_gapfilling(solution) # Testing which gapfilled reactions are needed to produce each reactant in the objective function diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 5e8b07f3..a390de44 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -13,6 +13,7 @@ logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO +zero_threshold = 0.0000001 class MSGrowthPhenotype: def __init__( @@ -62,9 +63,10 @@ def simulate( multiplier=3, add_missing_exchanges=False, save_fluxes=False, - pfba=False, + save_reaction_list=False, ignore_experimental_data=False, - baseline_objective=0.01 + baseline_objective=0.01, + flux_coefficients=None, ): """Simulates a single phenotype Parameters @@ -145,10 +147,52 @@ def simulate( # Optimizing model solution = modelutl.model.optimize() output["objective_value"] = solution.objective_value - if solution.objective_value > 0 and pfba: - solution = cobra.flux_analysis.pfba(modelutl.model) - if save_fluxes: - output["fluxes"] = solution.fluxes + if solution.objective_value > 0: + if flux_coefficients == None: + solution = cobra.flux_analysis.pfba(modelutl.model) + else: + #modelutl.printlp(lpfilename="lpfiles/gapfill.lp") + if '1_objc' in modelutl.model.constraints: + constraint = modelutl.model.constraints['1_objc'] + modelutl.model.remove_cons_vars([constraint]) + modelutl.pkgmgr.getpkg("ObjConstPkg").build_package( + 0.2*output["objective_value"], None + ) + coefobj = modelutl.model.problem.Objective(0, direction="min") + modelutl.model.objective = coefobj + obj_coef = {} + for rxn in flux_coefficients: + rxnid = rxn + direction = "=" + if rxn[0:1] == ">" or rxn[0:1] == "<": + direction = rxn[0:1] + rxnid = rxn[1:] + if rxnid in modelutl.model.reactions: + rxnobj = modelutl.model.reactions.get_by_id(rxnid) + if direction == ">" or direction == "=": + obj_coef[rxnobj.forward_variable] = flux_coefficients[rxn] + if direction == "<" or direction == "=": + obj_coef[rxnobj.reverse_variable] = flux_coefficients[rxn] + coefobj.set_linear_coefficients(obj_coef) + solution = modelutl.model.optimize() + modelutl.pkgmgr.getpkg("ObjConstPkg").clear() + if save_reaction_list: + output["reactions"] = [] + if save_fluxes: + output["fluxes"] = solution.fluxes + output["gapfill_count"] = 0 + output["reaction_count"] = 0 + for reaction in modelutl.model.reactions: + if reaction.id in solution.fluxes: + flux = solution.fluxes[reaction.id] + if abs(flux) > zero_threshold: + output["reaction_count"] += 1 + if reaction.id[0:3] != "bio" and reaction.id[0:3] != "EX_" and reaction.id[0:3] != "DM_" and len(reaction.genes) == 0: + output["gapfill_count"] += 1 + if save_reaction_list and flux > zero_threshold: + output["reactions"].append(">"+reaction.id) + elif save_reaction_list: + output["reactions"].append("<"+reaction.id) # Determining phenotype class if output["objective_value"] >= output["baseline_objective"] * multiplier: @@ -424,10 +468,12 @@ def simulate_phenotypes( multiplier=3, add_missing_exchanges=False, save_fluxes=False, + save_reaction_list=False, gapfill_negatives=False, msgapfill=None, test_conditions=None, - ignore_experimental_data=False + ignore_experimental_data=False, + flux_coefficients=None ): """Simulates all the specified phenotype conditions and saves results Parameters @@ -471,7 +517,9 @@ def simulate_phenotypes( multiplier, add_missing_exchanges, save_fluxes, + save_reaction_list=save_reaction_list, ignore_experimental_data=ignore_experimental_data, + flux_coefficients=flux_coefficients ) datahash[pheno.id] = result data["Class"].append(result["class"]) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 2cf26995..014cd616 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -378,6 +378,7 @@ def set_objective_from_phenotype(self,phenotype,missing_transporters=[],create_m for reaction in self.model.reactions: if reaction.objective_coefficient != 0: reaction.objective_coefficient = -1*reaction.objective_coefficient + self.model.objective.direction = 'max' return str(self.model.objective) ################################################################################# @@ -427,6 +428,12 @@ def add_transport_and_exchange_for_metabolite(self, met,direction="=",prefix="tr transport.name = "Charge-nuetral transport for " + met.name transport.add_metabolites(stoich) transport.annotation["sbo"] = "SBO:0000185" + transport.upper_bound = 0 + transport.lower_bound = 0 + if direction == ">" or direction == "=": + transport.upper_bound = 1000 + if direction == "<" or direction == "=": + transport.lower_bound = -1000 self.model.add_reactions([transport]) self.add_exchanges_for_metabolites([exmet],0,1000) return transport @@ -786,7 +793,8 @@ def test_solution(self,solution,targets,medias,thresholds=[0.1],remove_unneeded_ solution = self.convert_solution_to_list(solution) #Processing solution in standardized format for item in solution: - rxn_id = item[0] + rxn_id = item[0] + other_original_bound = None rxnobj = self.model.reactions.get_by_id(rxn_id) #Testing all media and target and threshold combinations to see if the reaction is needed needed = False @@ -800,9 +808,15 @@ def test_solution(self,solution,targets,medias,thresholds=[0.1],remove_unneeded_ #This has to happen after media is applied in case the reaction is an exchange if item[1] == ">": original_bound = rxnobj.upper_bound + if rxnobj.lower_bound > 0: + other_original_bound = rxnobj.lower_bound + rxnobj.lower_bound = 0 rxnobj.upper_bound = 0 else: original_bound = rxnobj.lower_bound + if rxnobj.upper_bound < 0: + other_original_bound = rxnobj.upper_bound + rxnobj.upper_bound = 0 rxnobj.lower_bound = 0 #Computing the objective value objective = self.model.slim_optimize() @@ -818,7 +832,7 @@ def test_solution(self,solution,targets,medias,thresholds=[0.1],remove_unneeded_ ) #If the reaction isn't needed for any media and target combinations, add it to the unneeded list if not needed: - unneeded.append([rxn_id, item[1], item[2],original_bound]) + unneeded.append([rxn_id, item[1], item[2],original_bound,other_original_bound]) logger.info( rxn_id + item[1] @@ -830,16 +844,24 @@ def test_solution(self,solution,targets,medias,thresholds=[0.1],remove_unneeded_ #Restore the reaction if it is needed if item[1] == ">": rxnobj.upper_bound = original_bound + if other_original_bound != None: + rxnobj.lower_bound = other_original_bound else: rxnobj.lower_bound = original_bound + if other_original_bound != None: + rxnobj.upper_bound = other_original_bound if not remove_unneeded_reactions: #Restoring the bounds on the unneeded reactions for item in unneeded: rxnobj = self.model.reactions.get_by_id(item[0]) if item[1] == ">": rxnobj.upper_bound = item[3] + if item[4] != None: + rxnobj.lower_bound = item[4] else: rxnobj.lower_bound = item[3] + if item[4] != None: + rxnobj.upper_bound = item[4] else: #Do not restore bounds on unneeded reactions and remove reactions from model if their bounds are zero removed_rxns = [] @@ -848,8 +870,12 @@ def test_solution(self,solution,targets,medias,thresholds=[0.1],remove_unneeded_ if self.find_item_in_solution(do_not_remove_list,item): if item[1] == ">": rxnobj.upper_bound = item[3] + if item[4] != None: + rxnobj.lower_bound = item[4] else: rxnobj.lower_bound = item[3] + if item[4] != None: + rxnobj.upper_bound = item[4] elif rxnobj.lower_bound == 0 and rxnobj.upper_bound == 0 and not self.find_item_in_solution(do_not_remove_list,item,ignore_dir=True): removed_rxns.append(rxnobj) if len(removed_rxns) > 0: diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index c7a0b595..c795d327 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -716,16 +716,16 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): def test_gapfill_database(self): self.reset_objective_minimum(0,False) self.model.objective = self.original_objective - solution = self.model.optimize() + self.test_solution = self.model.optimize() logger.info( "Objective with gapfill database:" - + str(solution.objective_value) + + str(self.test_solution.objective_value) + "; min objective:" + str(self.parameters["minimum_obj"]) ) self.reset_objective_minimum(self.parameters["minimum_obj"]) self.model.objective = self.parameters["gfobj"] - if solution.objective_value < self.parameters["minimum_obj"]: + if self.test_solution.objective_value < self.parameters["minimum_obj"] or self.test_solution.status == 'infeasible': return False return True @@ -745,7 +745,7 @@ def reset_objective_minimum(self, min_objective,reset_params=True): if min_objective < 0: self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].ub = min_objective - def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],base_filter=None,base_target="rxn00062_c0"): + def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],base_filter=None,base_target="rxn00062_c0",base_filter_only=False): #Saving the current media current_media = self.current_media() #Clearing element uptake constraints @@ -768,18 +768,19 @@ def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],bas else: rxnobj.lower_bound = 0 # Filtering the database of any reactions that violate the specified tests - filetered_list = [] - with self.model: - rxnlist = [] - for reaction in self.model.reactions: - if reaction.id in self.gapfilling_penalties: - if "reverse" in self.gapfilling_penalties[reaction.id]: - rxnlist.append([reaction, "<"]) - if "forward" in self.gapfilling_penalties[reaction.id]: - rxnlist.append([reaction, ">"]) - filtered_list = self.modelutl.reaction_expansion_test( - rxnlist, test_conditions - ) + filtered_list = [] + if not base_filter_only: + with self.model: + rxnlist = [] + for reaction in self.model.reactions: + if reaction.id in self.gapfilling_penalties: + if "reverse" in self.gapfilling_penalties[reaction.id]: + rxnlist.append([reaction, "<"]) + if "forward" in self.gapfilling_penalties[reaction.id]: + rxnlist.append([reaction, ">"]) + filtered_list = self.modelutl.reaction_expansion_test( + rxnlist, test_conditions + ) #Adding base filter reactions to model if base_filter != None: gf_filter_att = self.modelutl.get_attributes("gf_filter", {}) diff --git a/modelseedpy/fbapkg/kbasemediapkg.py b/modelseedpy/fbapkg/kbasemediapkg.py index 7e53a0af..92525b30 100644 --- a/modelseedpy/fbapkg/kbasemediapkg.py +++ b/modelseedpy/fbapkg/kbasemediapkg.py @@ -50,8 +50,12 @@ def build_package( # First initializing all exchanges to default uptake and excretion exchange_list = self.modelutl.exchange_list() for reaction in exchange_list: - reaction.lower_bound = -1 * self.parameters["default_uptake"] - reaction.upper_bound = self.parameters["default_excretion"] + if -1 * self.parameters["default_uptake"] > reaction.upper_bound: + reaction.upper_bound = self.parameters["default_excretion"] + reaction.lower_bound = -1 * self.parameters["default_uptake"] + else: + reaction.lower_bound = -1 * self.parameters["default_uptake"] + reaction.upper_bound = self.parameters["default_excretion"] # Now constraining exchanges for specific compounds specified in the media if self.parameters["media"]: