diff --git a/README.md b/README.md index 9f87bb5..6184313 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ + +![CGEM Logo](https://github.com/jrolf/cgem/blob/main/cgem/images/CGEM_LOGO.png) + # Collaborative Generalized Effects Modeling (CGEM): A Comprehensive Overview ## Introduction diff --git a/README0.md b/archive/README0.md similarity index 100% rename from README0.md rename to archive/README0.md diff --git a/build/lib/cgem/__init__.py b/build/lib/cgem/__init__.py index 7430897..cf60087 100644 --- a/build/lib/cgem/__init__.py +++ b/build/lib/cgem/__init__.py @@ -2,5 +2,7 @@ import numpy as np import pandas as pd -from .models1 import * +from .terms import * +from .models import * +#. \ No newline at end of file diff --git a/cgem.egg-info/PKG-INFO b/cgem.egg-info/PKG-INFO index d43eba5..b02792a 100644 --- a/cgem.egg-info/PKG-INFO +++ b/cgem.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cgem -Version: 0.0.7 +Version: 0.0.8 Summary: CGEM: Collaborative Generalized Effects Modeling Home-page: https://github.com/jrolf/cgem Author: James A. Rolfsen diff --git a/cgem.egg-info/SOURCES.txt b/cgem.egg-info/SOURCES.txt index 284cf2d..f114dbb 100644 --- a/cgem.egg-info/SOURCES.txt +++ b/cgem.egg-info/SOURCES.txt @@ -1,7 +1,8 @@ README.md setup.py cgem/__init__.py -cgem/models1.py +cgem/models.py +cgem/terms.py cgem.egg-info/PKG-INFO cgem.egg-info/SOURCES.txt cgem.egg-info/dependency_links.txt diff --git a/cgem/__init__.py b/cgem/__init__.py index 7430897..cf60087 100644 --- a/cgem/__init__.py +++ b/cgem/__init__.py @@ -2,5 +2,7 @@ import numpy as np import pandas as pd -from .models1 import * +from .terms import * +from .models import * +#. \ No newline at end of file diff --git a/cgem/models1.py b/cgem/models.py similarity index 99% rename from cgem/models1.py rename to cgem/models.py index 04e7cbb..e037e52 100644 --- a/cgem/models1.py +++ b/cgem/models.py @@ -934,7 +934,7 @@ def evaluation(self, eq_str1="y=m*x+b", solve_for='x', dfname='df1', tvars=[]): es = self.evaluation_string(eq_str1, solve_for, dfname, tvars) return eval(es) - def fit(self, n_epochs=50): + def fit(self, n_epochs=50,verbose=False): # Creates the initial version of the Transient Effects DataFrame: self.initialize_tdf() # << self.TDF is created. @@ -947,8 +947,8 @@ def fit(self, n_epochs=50): #R2 = max(round(r2_score(actuals, preds), 5), 0.00001) for epoch_num in range(1,n_epochs+1): - if epoch_num % 1 == 0: # Adjust this condition for controlling the print frequency - print(f"\n{'#' * 50}\nLearning Epoch: {epoch_num + 1}") + if verbose==True and epoch_num % 1 == 0: # Adjust this condition for controlling the print frequency + print(f"\n{'#' * 50}\nLearning Epoch: {epoch_num}") # Initial Evaluation yhat1 = self.evaluation(self.TrueForm, self.YVar, 'self.TDF', tvars=self.TermList + [self.YVar]) @@ -1014,11 +1014,11 @@ def fit(self, n_epochs=50): } self.epoch_logs.append(elog) - if epoch_num % 1 == 0: # Adjust this condition for controlling the print frequency + if verbose==True and epoch_num % 1 == 0: # Adjust this condition for controlling the print frequency print(f"{'-' * 50}\nRMSE 1: {rmse1}\nRMSE 2: {rmse2}\nDELTA: {rmse2 - rmse1}") print(f"RSQ 1: {rsq1}\nRSQ 2: {rsq2}\nDELTA: {rsq2 - rsq1}\n{'-' * 50}") - print('Done.') + print('CGEM model fitting complete. ('+str(epoch_num)+' epochs)') def initialize_tdf(self): """ diff --git a/cgem/terms.py b/cgem/terms.py new file mode 100644 index 0000000..6d01a1e --- /dev/null +++ b/cgem/terms.py @@ -0,0 +1,194 @@ + +############################################################################# +############################################################################# + +Notes = ''' + +**Collaborative Generalized Effects Modeling (CGEM): A Comprehensive Overview** + +### What is CGEM? + +Collaborative Generalized Effects Modeling (CGEM) is an advanced statistical modeling framework that marks a significant evolution in the realm of data analysis and predictive modeling. It stands out in its ability to handle complex, real-world scenarios that are often encountered in business analytics, scientific research, and other domains where data relationships are intricate and multifaceted. CGEM's main strength lies in its innovative approach to model construction, which blends traditional statistical methods with modern machine learning techniques. + +### Defining Characteristics of CGEM + +1. **Formulaic Flexibility**: CGEM is characterized by its unparalleled formulaic freedom. Unlike conventional models constrained by linear or additive structures, CGEM allows for the creation of models with any mathematical form. This includes linear, non-linear, multiplicative, exponential, and more intricate relationships, providing a canvas for data scientists to model the real complexity found in data. + +2. **Generalization of Effects**: In CGEM, the concept of an 'effect' is broadly defined. An effect can be as straightforward as a constant or a linear term, or as complex as the output from a machine learning algorithm like a neural network or a random forest. This generalization enables the seamless integration of diverse methodologies within a single coherent model, offering a more holistic view of the data. + +3. **Iterative Convergence and Refinement**: The methodology operates through an iterative process, focusing on achieving a natural and efficient convergence of terms. This iterative refinement ensures that each effect in the model is appropriately calibrated, thus avoiding common pitfalls like overfitting or the disproportionate influence of particular variables. + +4. **Causal Coherence**: CGEM places a strong emphasis on maintaining causally coherent relationships. This principle ensures that the model's outputs are not just statistically significant but also meaningful and interpretable in the context of real-world scenarios. It is a crucial aspect that distinguishes CGEM from many other data modeling approaches. + +5. **Integration with Machine Learning**: Uniquely, CGEM is designed to incorporate machine learning models as effects within its framework. This integration allows for leveraging the predictive power of machine learning while maintaining the interpretability and structural integrity of traditional statistical models. + +### Underlying Principles Making CGEM Uniquely Powerful + +- **Versatility in Model Design**: CGEM's formulaic flexibility allows it to adapt to various data types and relationships, making it applicable in diverse fields from marketing to environmental science. + +- **Holistic Data Representation**: By allowing for a wide range of effects, CGEM can represent complex datasets more completely, capturing nuances that simpler models might miss. + +- **Balanced Complexity and Interpretability**: While it can incorporate complex machine learning models, CGEM also maintains a level of interpretability that is often lost in more black-box approaches. + +- **Focus on Causality**: By ensuring that models are causally coherent, CGEM bridges the gap between correlation and causation, a critical factor in making sound decisions based on model outputs. + +- **Adaptive Learning and Refinement**: The iterative nature of CGEM enables it to refine its parameters continually, leading to models that are both robust and finely tuned to the data. + +### Conclusion + +CGEM represents a significant leap in statistical modeling, offering a sophisticated, flexible, and powerful tool for understanding and predicting complex data relationships. Its unique blend of formulaic freedom, generalization of effects, and focus on causal coherence makes it an invaluable resource in the data scientist's toolkit, particularly in an era where data complexity and volume are ever-increasing. + +''' + +############################################################################# +############################################################################# + +import numpy as np +import pandas as pd +import pandas_ta as ta + +from random import shuffle, choice +import random,time,os,io,requests,datetime +import json,hmac,hashlib,base64,pickle +from collections import defaultdict as defd +from heapq import nlargest +from copy import deepcopy + +from scipy import signal +from scipy.stats import entropy +from scipy.constants import convert_temperature +from scipy.interpolate import interp1d +#from scipy.ndimage.filters import uniform_filter1d + +#https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html +#https://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html#sphx-glr-auto-examples-ensemble-plot-forest-importances-py +from sklearn.ensemble import ExtraTreesClassifier as ETC +from sklearn.ensemble import ExtraTreesRegressor as ETR +from sklearn.ensemble import BaggingClassifier as BGC +from sklearn.ensemble import GradientBoostingClassifier as GBC +from sklearn.ensemble import GradientBoostingRegressor as GBR +from sklearn.neural_network import MLPRegressor as MLP +from sklearn.linear_model import LinearRegression as OLS +from sklearn.preprocessing import LabelBinarizer as LBZ +from sklearn.decomposition import PCA as PCA + +from sklearn.model_selection import cross_validate, ShuffleSplit, train_test_split +from sklearn.datasets import make_regression +from sklearn.pipeline import Pipeline +from sklearn.utils import check_array +from sklearn.preprocessing import * +from sklearn.metrics import * + +from sympy.solvers import solve +from sympy import Symbol,Eq,sympify +from sympy import log,ln,exp #,Wild,Mul,Add,sin,cos,tan + +import statsmodels.formula.api as smf +from pygam import LinearGAM, GAM #, s, f, l, te + +import xgboost as xgb +from xgboost import XGBRegressor as XGR + + +############################################################################# +############################################################################# + + +# Set print options: suppress scientific notation and set precision +np.set_printoptions(suppress=True, precision=8) +# Set Numpy error conditions: +old_set = np.seterr(divide = 'ignore',invalid='ignore') + + +############################################################################# +############################################################################# + + +def clean_shape(X, y): + """ + Reshapes the input features X and target y into shapes compatible with scikit-learn models. + + Parameters: + X: array-like, list, DataFrame, or Series - input features + y: array-like, list, DataFrame, or Series - target values + + Returns: + X2, y2: reshaped versions of X and y, suitable for use with scikit-learn models + """ + # Ensure X is a 2D array-like structure + if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): + X2 = X.values + else: + X2 = np.array(X) + # Reshape X to 2D if it's 1D, assuming each element is a single feature + if X2.ndim == 1: + X2 = X2.reshape(-1, 1) + # Ensure y is a 1D array-like structure + if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): + y2 = y.values.ravel() # Flatten to 1D + else: + y2 = np.array(y).ravel() + # Check if X2 and y2 are in acceptable shape for sklearn models + X2 = check_array(X2) + y2 = check_array(y2, ensure_2d=False) + + return X2, y2 + + +Usage = ''' + +# Example usage: +X = [1,2,3,4,3,4,5,6,4,6,7,8] +y = [2,3,4,3,4,5,6,5,6,7,8,9] +X2, y2 = clean_shape(X, y) + +print("X2 shape:", X2.shape) +print("y2 shape:", y2.shape) +print() + +from sklearn.linear_model import LinearRegression as OLS + +model = OLS() +model.fit(X2, y2) +yhat = model.predict(X2) + +for y_1, y_hat in zip(y,yhat): + print(y_1, y_hat) + +print() +print(y2.mean()) +print(yhat.mean()) + +print() +print(y2.std()) +print(yhat.std()) + +''' + +############################################################################# +############################################################################# + + + + + + + + + + + + + + + + + + +############################################################################# +############################################################################# + + + + + diff --git a/examples/CGEM_HELLO_WORLD1.ipynb b/examples/CGEM_HELLO_WORLD1.ipynb new file mode 100644 index 0000000..cfaf9c2 --- /dev/null +++ b/examples/CGEM_HELLO_WORLD1.ipynb @@ -0,0 +1,839 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "5e2ec9e9", + "metadata": {}, + "outputs": [], + "source": [ + "Notes = '''\n", + "\n", + "## Add the locations of your python Libraries if you have multiple locations:\n", + "\n", + "import sys\n", + "new_paths = [\n", + " \"/Users/jar/Library/Python/3.8/bin\",\n", + " \"/Users/jar/Library/Python/3.8/lib/python/site-packages\", \n", + "]\n", + "for p in new_paths:\n", + " if p not in sys.path: \n", + " sys.path = [p]+sys.path \n", + "\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e14731ea", + "metadata": {}, + "outputs": [], + "source": [ + "#############################################################################\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "429a6601", + "metadata": {}, + "outputs": [], + "source": [ + "### INSTALL THE CGEM MODULE:\n", + "# pip install --upgrade cgem\n", + "# pip show cgem" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a7b63d2f", + "metadata": {}, + "outputs": [], + "source": [ + "from cgem import *" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "647747bc", + "metadata": {}, + "outputs": [], + "source": [ + "#############################################################################\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "38b2f4a6", + "metadata": {}, + "outputs": [], + "source": [ + "Task = '''\n", + "\n", + "Create a simple causal simulation to generate a dataset\n", + "that can be used to conduct a computational proof of CGEM.\n", + "\n", + "'''\n", + "def gen_artificial_data_v1(size=10000):\n", + " \"\"\"\n", + " Generate an artificial dataset representing a causal system.\n", + "\n", + " Parameters:\n", + " size (int): Number of data points to generate.\n", + "\n", + " Returns:\n", + " pandas.DataFrame: A DataFrame with the generated data.\n", + " \"\"\"\n", + " global cats,effs\n", + " # Generating random values for the variables\n", + " reg_var_a = np.random.normal(10, 3, size)\n", + " reg_var_b = np.random.normal(12, 4, size)\n", + " reg_var_c = np.random.normal(15, 5, size)\n", + "\n", + " # Calculating the effect based on the variables\n", + " effect_x = 20.0 + (1.0 * reg_var_a) + (1.5 * reg_var_b) + (2.0 * reg_var_c)\n", + "\n", + " # Defining categories and their corresponding effects\n", + " cats = list(\"ABCDEFGHIJ\")\n", + " effs = np.around(np.linspace(0.5, 1.4, len(cats)), 2)\n", + " cat2effect = {cat: round(eff, 4) for cat, eff in zip(cats, effs)}\n", + "\n", + " # Generating categorical variable and its effect\n", + " cat_var_d = np.array([choice(cats) for _ in range(size)])\n", + " cat_effect_d = np.array([cat2effect[c] for c in cat_var_d])\n", + "\n", + " # Adding a noise effect\n", + " noise_effect = np.random.uniform(0.90, 1.10, size)\n", + "\n", + " # Calculating the target variable\n", + " target_var_z = ((effect_x) * cat_effect_d) * noise_effect\n", + "\n", + " # Constructing the dataframe\n", + " df = pd.DataFrame({\n", + " 'TGT_Z': target_var_z,\n", + " 'REG_A': reg_var_a,\n", + " 'REG_B': reg_var_b,\n", + " 'REG_C': reg_var_c,\n", + " 'CAT_D': cat_var_d\n", + " })\n", + "\n", + " return df\n", + "\n", + "#------------------------------------------------\n", + "\n", + "DF1 = gen_artificial_data_v1(size=10000)\n", + "DF2 = gen_artificial_data_v1(size=10000) \n", + "\n", + "#------------------------------------------------" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "3acfd774", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TGT_ZREG_AREG_BREG_CCAT_D
056.1957866.24814712.37355416.175011D
149.19409216.53851315.85486311.959708B
2116.99198612.02218712.71059619.066333I
367.9898169.24355214.77721918.810220C
454.96116010.45442820.68013318.433032B
544.3925949.9886827.36304013.539356B
675.04441710.32256017.9550788.957982G
765.1761738.16934711.80788021.397151C
852.91879010.2679269.28275613.684600D
981.18467915.74298313.93274015.751626E
\n", + "
" + ], + "text/plain": [ + " TGT_Z REG_A REG_B REG_C CAT_D\n", + "0 56.195786 6.248147 12.373554 16.175011 D\n", + "1 49.194092 16.538513 15.854863 11.959708 B\n", + "2 116.991986 12.022187 12.710596 19.066333 I\n", + "3 67.989816 9.243552 14.777219 18.810220 C\n", + "4 54.961160 10.454428 20.680133 18.433032 B\n", + "5 44.392594 9.988682 7.363040 13.539356 B\n", + "6 75.044417 10.322560 17.955078 8.957982 G\n", + "7 65.176173 8.169347 11.807880 21.397151 C\n", + "8 52.918790 10.267926 9.282756 13.684600 D\n", + "9 81.184679 15.742983 13.932740 15.751626 E" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DF1.head(10) " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c8d94987", + "metadata": {}, + "outputs": [], + "source": [ + "#############################################################################\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "9da343d5", + "metadata": {}, + "outputs": [], + "source": [ + "### MASTER EFFECTS FORMULA: \n", + "Formula = \"TGT_Z = CAT_D_EFF * LIN_REG_EFF\"\n", + "\n", + "### TERMS PARAMETERS:\n", + "tparams = {\n", + " \"CAT_D_EFF\": {\n", + " 'model': \"CatRegModel()\", \n", + " 'xvars': ['CAT_D'],\n", + " 'ival' : 10,\n", + " },\n", + " \"LIN_REG_EFF\": {\n", + " 'model': \"OLS()\", \n", + " 'xvars': ['REG_A','REG_B','REG_C'],\n", + " 'ival' : 10,\n", + " } \n", + "} " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "dbe1786e", + "metadata": {}, + "outputs": [], + "source": [ + "model = CGEM() \n", + "model.load_df(DF1) \n", + "model.define_form(Formula) \n", + "model.define_terms(tparams) " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "4fd8ce8c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "##################################################\n", + "Learning Epoch: 1\n", + "--------------------------------------------------\n", + "RMSE 1: 36.54713079260933\n", + "RMSE 2: 31.28781419363407\n", + "DELTA: -5.259316598975261\n", + "RSQ 1: -1.0303183556525064\n", + "RSQ 2: -0.4880174142276028\n", + "DELTA: 0.5423009414249036\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 2\n", + "--------------------------------------------------\n", + "RMSE 1: 31.28781419363407\n", + "RMSE 2: 26.998834140188247\n", + "DELTA: -4.2889800534458224\n", + "RSQ 1: -0.4880174142276028\n", + "RSQ 2: -0.10801997030543387\n", + "DELTA: 0.37999744392216894\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 3\n", + "--------------------------------------------------\n", + "RMSE 1: 26.998834140188247\n", + "RMSE 2: 23.491602642353833\n", + "DELTA: -3.5072314978344146\n", + "RSQ 1: -0.10801997030543387\n", + "RSQ 2: 0.1611528014541571\n", + "DELTA: 0.269172771759591\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 4\n", + "--------------------------------------------------\n", + "RMSE 1: 23.491602642353833\n", + "RMSE 2: 20.61257638906548\n", + "DELTA: -2.8790262532883517\n", + "RSQ 1: 0.1611528014541571\n", + "RSQ 2: 0.35416418934661686\n", + "DELTA: 0.19301138789245975\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 5\n", + "--------------------------------------------------\n", + "RMSE 1: 20.61257638906548\n", + "RMSE 2: 18.237185031710226\n", + "DELTA: -2.375391357355255\n", + "RSQ 1: 0.35416418934661686\n", + "RSQ 2: 0.49443945317697924\n", + "DELTA: 0.14027526383036237\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 6\n", + "--------------------------------------------------\n", + "RMSE 1: 18.237185031710226\n", + "RMSE 2: 16.264938975942428\n", + "DELTA: -1.9722460557677977\n", + "RSQ 1: 0.49443945317697924\n", + "RSQ 2: 0.5978737327900269\n", + "DELTA: 0.10343427961304763\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 7\n", + "--------------------------------------------------\n", + "RMSE 1: 16.264938975942428\n", + "RMSE 2: 14.615392149713143\n", + "DELTA: -1.649546826229285\n", + "RSQ 1: 0.5978737327900269\n", + "RSQ 2: 0.6753028242184651\n", + "DELTA: 0.07742909142843823\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 8\n", + "--------------------------------------------------\n", + "RMSE 1: 14.615392149713143\n", + "RMSE 2: 13.224678752298788\n", + "DELTA: -1.3907133974143555\n", + "RSQ 1: 0.6753028242184651\n", + "RSQ 2: 0.7341554089839795\n", + "DELTA: 0.05885258476551436\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 9\n", + "--------------------------------------------------\n", + "RMSE 1: 13.224678752298788\n", + "RMSE 2: 12.042565954525402\n", + "DELTA: -1.1821127977733852\n", + "RSQ 1: 0.7341554089839795\n", + "RSQ 2: 0.7795573449265298\n", + "DELTA: 0.045401935942550375\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 10\n", + "--------------------------------------------------\n", + "RMSE 1: 12.042565954525402\n", + "RMSE 2: 11.02983096537045\n", + "DELTA: -1.0127349891549517\n", + "RSQ 1: 0.7795573449265298\n", + "RSQ 2: 0.8150751480190875\n", + "DELTA: 0.0355178030925577\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 11\n", + "--------------------------------------------------\n", + "RMSE 1: 11.02983096537045\n", + "RMSE 2: 10.156014566843528\n", + "DELTA: -0.8738163985269232\n", + "RSQ 1: 0.8150751480190875\n", + "RSQ 2: 0.8432151133626915\n", + "DELTA: 0.028139965343603968\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 12\n", + "--------------------------------------------------\n", + "RMSE 1: 10.156014566843528\n", + "RMSE 2: 9.39756941703513\n", + "DELTA: -0.7584451498083968\n", + "RSQ 1: 0.8432151133626915\n", + "RSQ 2: 0.8657579267306693\n", + "DELTA: 0.02254281336797781\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 13\n", + "--------------------------------------------------\n", + "RMSE 1: 9.39756941703513\n", + "RMSE 2: 8.736240960196044\n", + "DELTA: -0.6613284568390867\n", + "RSQ 1: 0.8657579267306693\n", + "RSQ 2: 0.8839869687855524\n", + "DELTA: 0.01822904205488307\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 14\n", + "--------------------------------------------------\n", + "RMSE 1: 8.736240960196044\n", + "RMSE 2: 8.157813781533669\n", + "DELTA: -0.5784271786623751\n", + "RSQ 1: 0.8839869687855524\n", + "RSQ 2: 0.8988408571022244\n", + "DELTA: 0.014853888316672004\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 15\n", + "--------------------------------------------------\n", + "RMSE 1: 8.157813781533669\n", + "RMSE 2: 7.651105495666542\n", + "DELTA: -0.506708285867127\n", + "RSQ 1: 0.8988408571022244\n", + "RSQ 2: 0.9110172247932146\n", + "DELTA: 0.012176367690990175\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 16\n", + "--------------------------------------------------\n", + "RMSE 1: 7.651105495666542\n", + "RMSE 2: 7.207126070098342\n", + "DELTA: -0.4439794255681999\n", + "RSQ 1: 0.9110172247932146\n", + "RSQ 2: 0.921044606183616\n", + "DELTA: 0.01002738139040149\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 17\n", + "--------------------------------------------------\n", + "RMSE 1: 7.207126070098342\n", + "RMSE 2: 6.818512561390101\n", + "DELTA: -0.3886135087082412\n", + "RSQ 1: 0.921044606183616\n", + "RSQ 2: 0.9293297128561846\n", + "DELTA: 0.008285106672568543\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 18\n", + "--------------------------------------------------\n", + "RMSE 1: 6.818512561390101\n", + "RMSE 2: 6.4790839358812145\n", + "DELTA: -0.3394286255088863\n", + "RSQ 1: 0.9293297128561846\n", + "RSQ 2: 0.93619058272619\n", + "DELTA: 0.006860869870005448\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 19\n", + "--------------------------------------------------\n", + "RMSE 1: 6.4790839358812145\n", + "RMSE 2: 6.183495113598347\n", + "DELTA: -0.2955888222828671\n", + "RSQ 1: 0.93619058272619\n", + "RSQ 2: 0.9418799994210839\n", + "DELTA: 0.005689416694893867\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 20\n", + "--------------------------------------------------\n", + "RMSE 1: 6.183495113598347\n", + "RMSE 2: 5.9270259565839485\n", + "DELTA: -0.2564691570143989\n", + "RSQ 1: 0.9418799994210839\n", + "RSQ 2: 0.9466012335730766\n", + "DELTA: 0.004721234151992659\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 21\n", + "--------------------------------------------------\n", + "RMSE 1: 5.9270259565839485\n", + "RMSE 2: 5.705406403871447\n", + "DELTA: -0.2216195527125011\n", + "RSQ 1: 0.9466012335730766\n", + "RSQ 2: 0.9505198808702766\n", + "DELTA: 0.00391864729720004\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 22\n", + "--------------------------------------------------\n", + "RMSE 1: 5.705406403871447\n", + "RMSE 2: 5.514758096823613\n", + "DELTA: -0.19064830704783464\n", + "RSQ 1: 0.9505198808702766\n", + "RSQ 2: 0.9537714256800116\n", + "DELTA: 0.0032515448097349964\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 23\n", + "--------------------------------------------------\n", + "RMSE 1: 5.514758096823613\n", + "RMSE 2: 5.351497257302443\n", + "DELTA: -0.1632608395211701\n", + "RSQ 1: 0.9537714256800116\n", + "RSQ 2: 0.9564680441084052\n", + "DELTA: 0.002696618428393638\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 24\n", + "--------------------------------------------------\n", + "RMSE 1: 5.351497257302443\n", + "RMSE 2: 5.212324961597968\n", + "DELTA: -0.13917229570447454\n", + "RSQ 1: 0.9564680441084052\n", + "RSQ 2: 0.9587028069472536\n", + "DELTA: 0.002234762838848381\n", + "--------------------------------------------------\n", + "\n", + "##################################################\n", + "Learning Epoch: 25\n", + "--------------------------------------------------\n", + "RMSE 1: 5.212324961597968\n", + "RMSE 2: 5.094213624125657\n", + "DELTA: -0.11811133747231128\n", + "RSQ 1: 0.9587028069472536\n", + "RSQ 2: 0.9605531914692769\n", + "DELTA: 0.0018503845220232407\n", + "--------------------------------------------------\n", + "CGEM model fitting complete. (25 epochs)\n" + ] + } + ], + "source": [ + "model.fit(25,verbose=True); " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "3f03aefe", + "metadata": {}, + "outputs": [], + "source": [ + "#############################################################################\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "c199064e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CrosVal R-Squared: 0.96857\n" + ] + } + ], + "source": [ + "preds = model.predict(DF2) \n", + "actuals = DF2['TGT_Z'].values\n", + "r2 = model.calc_r2(actuals,preds) \n", + "print('CrosVal R-Squared:',round(r2,5)) " + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "b25448d8", + "metadata": {}, + "outputs": [], + "source": [ + "#############################################################################\n", + "#############################################################################\n", + "#############################################################################\n", + "#############################################################################" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d1319cd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "312c6e8c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aad167ff", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1fc8824", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e748490", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f82fa1ab", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e49516f4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7021e6c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ff1e66a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dc41998", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cfff16d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eaab9587", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "097a786c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fda8e85", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4184d25a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba2eb95", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11270db7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98816a56", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ba935f8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/images/CGEM_LOGO.jpeg b/images/CGEM_LOGO.jpeg new file mode 100644 index 0000000..3bc2a1d Binary files /dev/null and b/images/CGEM_LOGO.jpeg differ diff --git a/images/CGEM_LOGO.png b/images/CGEM_LOGO.png new file mode 100644 index 0000000..404cdd0 Binary files /dev/null and b/images/CGEM_LOGO.png differ diff --git a/images/JAMES_ROLFSEN.jpeg b/images/JAMES_ROLFSEN.jpeg new file mode 100644 index 0000000..bd64293 Binary files /dev/null and b/images/JAMES_ROLFSEN.jpeg differ diff --git a/setup.py b/setup.py index d692407..9224ab9 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( # Basic package information: name ='cgem', - version ='0.0.7', + version ='0.0.8', packages=find_packages(), # Automatically find packages in the directory # Dependencies: