-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from mpecchi/add_my_figure
Separated plotting functions, fixed bugs, added lower level testing
- Loading branch information
Showing
34 changed files
with
5,010 additions
and
3,035 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
# %% Import necessary libraries | ||
import pathlib as plib # Used for handling file and directory paths | ||
from gcms_data_analysis import Project | ||
from gcms_data_analysis.plotting import plot_ave_std | ||
|
||
# Define the folder path where your data is located. Change this path to where you've stored your data files. | ||
# folder_path = plib.Path(plib.Path(__file__).parent, "example\data") | ||
folder_path = plib.Path( | ||
r"C:\Users\mp933\OneDrive - Cornell University\Python\gcms_data_analysis\tests\data_minimal_case" | ||
) | ||
# folder_path: plib.Path = plib.Path( | ||
# r"C:\Users\mp933\OneDrive - Cornell University\Python\GCMS\NNDNDD" | ||
# ) | ||
# Set global configurations for the Project class. | ||
# These configurations affect all instances of the class. | ||
Project.set_folder_path( | ||
folder_path | ||
) # Set the base folder path for the project's data files | ||
Project.set_plot_grid(False) # Disable grid lines in plots for a cleaner look | ||
Project.set_plot_font("Sans") # Set the font style for plots to 'Sans' | ||
Project.set_auto_save_to_excel(False) | ||
# Initialize a Project instance to manage and analyze GCMS data | ||
gcms = Project() | ||
|
||
# Load metadata from a user-provided 'files_info.xlsx' file, or generate it from .txt GC-MS files if not provided | ||
files_info = gcms.load_files_info() | ||
# Load individual GCMS .txt files as pandas DataFrames | ||
files = gcms.load_all_files() | ||
files = gcms.add_iupac_to_files() | ||
list_of_all_compounds = gcms.create_list_of_all_compounds() | ||
files, is_files_deriv = gcms.apply_calibration_to_files() | ||
samples_info, samples_info_std = gcms.create_samples_info() | ||
samples, samples_std = gcms.create_samples_from_files() | ||
|
||
params = [ | ||
"height", | ||
"area", | ||
"area_if_undiluted", | ||
"conc_vial_mg_L", | ||
"conc_vial_if_undiluted_mg_L", | ||
"fraction_of_sample_fr", | ||
"fraction_of_feedstock_fr", | ||
] | ||
for param in params: | ||
_ = gcms.create_files_param_report(param) | ||
_ = gcms.create_files_param_aggrrep(param) | ||
|
||
_, _ = gcms.create_samples_param_report(param) | ||
_, _ = gcms.create_samples_param_aggrrep(param) | ||
|
||
# %% | ||
for param in params: | ||
print(f"'{param}': ") | ||
print_checked_df_to_script_text_with_arrays(gcms.files_reports[param]) | ||
# %% | ||
|
||
for param in params: | ||
print(f"'{param}': ") | ||
print_checked_df_to_script_text_with_arrays(gcms.files_aggrreps[param]) | ||
# %% | ||
for param in params: | ||
print(f"'{param}': ") | ||
print_checked_df_to_script_text_with_arrays(gcms.samples_reports[param]) | ||
# %% | ||
for param in params: | ||
print(f"'{param}': ") | ||
print_checked_df_to_script_text_with_arrays(gcms.samples_reports_std[param]) | ||
# %% | ||
|
||
for param in params: | ||
print(f"'{param}': ") | ||
print_checked_df_to_script_text_with_arrays(gcms.samples_aggrreps[param]) | ||
# %% | ||
|
||
for param in params: | ||
print(f"'{param}': ") | ||
print_checked_df_to_script_text_with_arrays(gcms.samples_aggrreps_std[param]) | ||
# %% | ||
|
||
|
||
# Load classification codes and mass fractions for functional groups from a provided file | ||
class_code_frac = gcms.load_class_code_frac() | ||
|
||
# Load calibration data for standard and derivatized samples, and determine if they are derivatized | ||
calibrations, is_calibr_deriv = gcms.load_calibrations() | ||
# c1, c2 = calibrations["calibration"], calibrations["deriv_calibration"] | ||
|
||
# Generate a comprehensive list of all compounds found across samples | ||
list_of_all_compounds = gcms.create_list_of_all_compounds() | ||
|
||
# Similarly, create a list of all derivatized compounds found across samples | ||
list_of_all_deriv_compounds = gcms.create_list_of_all_deriv_compounds() | ||
|
||
# Load properties for standard and derivatized compounds from provided files | ||
compounds_properties = gcms.create_compounds_properties() | ||
deriv_compounds_properties = gcms.create_deriv_compounds_properties() | ||
|
||
# Flag indicating whether new compounds have been added, triggering a need to regenerate properties data | ||
new_files_with_new_compounds_added = False | ||
if new_files_with_new_compounds_added: | ||
compounds_properties = gcms.create_compounds_properties() | ||
deriv_compounds_properties = gcms.create_deriv_compounds_properties() | ||
|
||
# Apply calibration data to all loaded files, adjusting compound concentrations based on calibration curves | ||
files, is_files_deriv = gcms.apply_calibration_to_files() | ||
|
||
# Extract specific files for detailed analysis or further operations | ||
f11, f22, f33 = files["A_1"], files["Ader_1"], files["B_1"] | ||
|
||
# # Add statistical information to the files_info DataFrame, such as mean, median, and standard deviation for each file | ||
files_info = gcms.add_stats_to_files_info() | ||
|
||
# Create a samples_info DataFrame without applying calibration data, for initial analysis | ||
samples_info_0 = gcms.create_samples_info() | ||
|
||
# Create samples and their standard deviations from the files, storing the results in dictionaries | ||
samples, samples_std = gcms.create_samples_from_files() | ||
s1, s2, s3 = samples["A"], samples["Ader"], samples["B"] | ||
sd1, sd2, sd3 = samples_std["A"], samples_std["Ader"], samples_std["B"] | ||
|
||
# Generate reports for specific parameters (e.g., concentration, mass fraction) for files and samples | ||
rep_files_conc = gcms.create_files_param_report(param="conc_vial_mg_L") | ||
rep_files_fr = gcms.create_files_param_report(param="fraction_of_sample_fr") | ||
rep_samples_conc, rep_samples_conc_std = gcms.create_samples_param_report( | ||
param="conc_vial_mg_L" | ||
) | ||
rep_samples_fr, rep_samples_fr_std = gcms.create_samples_param_report( | ||
param="fraction_of_sample_fr" | ||
) | ||
|
||
# Generate aggregated reports based on functional groups for files and samples, for specific parameters | ||
agg_files_conc = gcms.create_files_param_aggrrep(param="conc_vial_mg_L") | ||
agg_files_fr = gcms.create_files_param_aggrrep(param="fraction_of_sample_fr") | ||
agg_samples_conc, agg_samples_conc_std = gcms.create_samples_param_aggrrep( | ||
param="conc_vial_mg_L" | ||
) | ||
agg_samples_fr, agg_samples_fr_std = gcms.create_samples_param_aggrrep( | ||
param="fraction_of_sample_fr" | ||
) | ||
|
||
# Plotting results based on the generated reports, allowing for visual comparison of average values and standard deviations | ||
# Plot results for individual files or samples based | ||
|
||
plot_ave_std( | ||
gcms, | ||
param="fraction_of_sample_fr", | ||
min_y_thresh=0, | ||
files_or_samples="files", | ||
legend_location="outside", | ||
only_samples_to_plot=["A_1", "A_2", "Ader_1", "Ader_2"], # y_lim=[0, 5000] | ||
) | ||
# plot results bases on aggreport | ||
plot_ave_std( | ||
gcms, | ||
param="fraction_of_sample_fr", | ||
aggr=True, | ||
files_or_samples="files", | ||
min_y_thresh=0.01, | ||
y_lim=[0, 0.5], | ||
color_palette="Set2", | ||
) | ||
|
||
plot_ave_std( | ||
gcms, | ||
param="fraction_of_sample_fr", | ||
min_y_thresh=0, | ||
legend_location="outside", | ||
only_samples_to_plot=["A", "Ader"], # y_lim=[0, 5000] | ||
) | ||
# plot results bases on aggreport | ||
plot_ave_std( | ||
gcms, | ||
param="fraction_of_sample_fr", | ||
aggr=True, | ||
min_y_thresh=0.01, | ||
y_lim=[0, 0.5], | ||
color_palette="Set2", | ||
) | ||
|
||
# %% | ||
# import pickle | ||
|
||
# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") | ||
# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") | ||
# with open(pickle_path, "wb") as output_file: | ||
# pickle.dump(gcms, output_file) | ||
# %% | ||
# import pickle | ||
# import pathlib as plib # Used for handling file and directory paths | ||
# from gcms_data_analysis import ( | ||
# Project, | ||
# ) # Import the Project class from the gcms_data_analysis package | ||
|
||
# folder_path: plib.Path = plib.Path(r"C:\Users\mp933\Desktop\New folder") | ||
# pickle_path: plib.Path = plib.Path(folder_path, "pickle_object.pkl") | ||
# with open(pickle_path, "rb") as input_file: | ||
# gcms: Project = pickle.load(input_file) | ||
# from gcms_data_analysis.plotting import plot_pave_std | ||
|
||
# # %% | ||
# myfig = plot_pave_std( | ||
# gcms, | ||
# files_or_samples="files", | ||
# width=12, | ||
# height=5, | ||
# legend_location="outside", | ||
# y_lim=[0, 100], | ||
# ) | ||
# # %% | ||
# myfig = plot_pave_std( | ||
# gcms, | ||
# files_or_samples="samples", | ||
# width=6, | ||
# height=6, | ||
# legend_location="best", | ||
# y_lim=[0, 100], | ||
# min_y_thresh=10, | ||
# ) | ||
|
||
# # %% |
Binary file added
BIN
+10.5 KB
example/name_to_properties/data_name_to_properties/checked_compounds_properties.xlsx
Binary file not shown.
Binary file renamed
BIN
+11.4 KB
..._set/classifications_codes_fractions.xlsx → ...ties/classifications_codes_fractions.xlsx
Binary file not shown.
Oops, something went wrong.