1+ import pandas as pd
2+ import numpy as np
3+ import matplotlib .pyplot as plt
4+
5+ from sklearn .linear_model import LogisticRegression
6+ from sklearn .preprocessing import StandardScaler
7+ from sklearn .metrics import classification_report , confusion_matrix
8+
9+ file1 = "Hospital1.txt"
10+ file2 = "Hospital2.txt"
11+
12+ try :
13+ data1 = pd .read_csv (file1 )
14+ print ("Yah yee the file loaded successfully!\n " )
15+ print (data1 .head ())
16+ except FileNotFoundError :
17+ print (f"Error: '{ file1 } ' not found. Check the filename and path." )
18+ except Exception as e :
19+ print ("An error occurred while reading the file:" , e )
20+
21+ num_readmitted = data1 [" Readmission" ].sum ()
22+
23+ avg_staff = data1 [" StaffSatisfaction" ].mean ()
24+ avg_clean = data1 [" CleanlinessSatisfaction" ].mean ()
25+ avg_food = data1 [" FoodSatisfaction" ].mean ()
26+ avg_comfort = data1 [" ComfortSatisfaction" ].mean ()
27+ avg_comm = data1 [" CommunicationSatisfaction" ].mean ()
28+
29+ print ("-- Patient Satisfaction Summary --" )
30+ print (f"Number of Patients Readmitted: { num_readmitted } " )
31+ print (f"Average Staff Satisfaction: { avg_staff :.2f} " )
32+ print (f"Average Cleanliness Satisfaction { avg_clean :.2f} " )
33+ print (f"Average Food Satisfaction: { avg_food :.2f} " )
34+ print (f"Average Comfort Satisfaction: { avg_comfort :.2f} " )
35+ print (f"Average Communication Satisfaction: { avg_comm :.2f} " )
36+
37+ try :
38+ data2 = pd .read_csv (file2 )
39+ print ("Yah yee the file loaded successfully!\n " )
40+ print (data1 .head ())
41+ except FileNotFoundError :
42+ print (f"Error: '{ file2 } ' not found. Check the filename and path." )
43+ except Exception as e :
44+ print ("An error occurred while reading the file:" , e )
45+
46+ num_readmitted2 = data2 [" Readmission" ].sum ()
47+
48+ avg_staff2 = data2 [" StaffSatisfaction" ].mean ()
49+ avg_clean2 = data2 [" CleanlinessSatisfaction" ].mean ()
50+ avg_food2 = data2 [" FoodSatisfaction" ].mean ()
51+ avg_comfort2 = data2 [" ComfortSatisfaction" ].mean ()
52+ avg_comm2 = data2 [" CommunicationSatisfaction" ].mean ()
53+
54+ print ("-- Patient Satisfaction Summary --" )
55+ print (f"Number of Patients Readmitted: { num_readmitted2 } " )
56+ print (f"Average Staff Satisfaction: { avg_staff2 :.2f} " )
57+ print (f"Average Cleanliness Satisfaction { avg_clean2 :.2f} " )
58+ print (f"Average Food Satisfaction: { avg_food2 :.2f} " )
59+ print (f"Average Comfort Satisfaction: { avg_comfort2 :.2f} " )
60+ print (f"Average Communication Satisfaction: { avg_comm2 :.2f} " )
61+
62+
63+ satisfaction_columns = [
64+ " StaffSatisfaction" ," CleanlinessSatisfaction" ," FoodSatisfaction" ,
65+ " ComfortSatisfaction" ," CommunicationSatisfaction"
66+ ]
67+ data1 ["OverallSatisfaction" ] = data1 [satisfaction_columns ].mean (axis = 1 )
68+ print (data1 [["PatientID" ,"OverallSatisfaction" ," Readmission" ]].head ())
69+
70+
71+ X1 = data1 [["OverallSatisfaction" ]].values
72+ y1 = data1 [" Readmission" ].values
73+
74+ scaler1 = StandardScaler ()
75+ X_scaled1 = scaler1 .fit_transform (X1 )
76+
77+ log_reg1 = LogisticRegression ()
78+ log_reg1 .fit (X_scaled1 ,y1 )
79+
80+ coef = log_reg1 .coef_ [0 ][0 ]
81+ intercept = log_reg1 .intercept_ [0 ]
82+
83+ print ("-- Logistic Regression Coefficeints --" )
84+ print (f"Intercept: { intercept :.3f} " )
85+ print (f"Coefficient for Overall Satisfaction: { coef :.3f} " )
86+
87+ if coef < 0 :
88+ direction = "Higher satisfaction is associated with LOWER probability of readmission."
89+ elif coef > 0 :
90+ direction = "Higher satisfaction is associated with HIGHER probability of readmission."
91+ else :
92+ direction = "No apparaent association between satisfaction and readmission."
93+
94+ print ("\n Interpretation:" )
95+ print (direction )
96+
97+
98+ x_range1 = np .linspace (data1 ["OverallSatisfaction" ].min (),data1 ["OverallSatisfaction" ].max (),100 ).reshape (- 1 ,1 )
99+ x_range_scaled1 = scaler1 .transform (x_range1 )
100+ y_prob1 = log_reg1 .predict_proba (x_range_scaled1 )[:,1 ]
101+
102+ plt .figure (figsize = (8 ,5 ))
103+ plt .scatter (data1 ["OverallSatisfaction" ],data1 [" Readmission" ],label = "Observed Data" , alpha = 0.7 )
104+ plt .plot (x_range1 , y_prob1 ,label = "Logistic Regression Curve" ,linewidth = 2 )
105+
106+ plt .xlabel ("Overall Satisfaction" )
107+ plt .ylabel ("Probability of Readmission (1 = Yes)" )
108+ plt .title ("Logistic Regression: Readmission vs Overall Satisfaction" )
109+ plt .legend ()
110+ plt .grid (True )
111+ plt .show ()
112+
113+
114+ y_pred1 = log_reg1 .predict (X_scaled1 )
115+ print ("-- Classification Report --" )
116+ print (classification_report (y1 ,y_pred1 ,zero_division = 0 ))
117+ print ("-- Confusion Matrix --" )
118+ print (confusion_matrix (y1 ,y_pred1 ))
119+
120+
121+ satisfaction_columns = [
122+ " StaffSatisfaction" ," CleanlinessSatisfaction" ," FoodSatisfaction" ,
123+ " ComfortSatisfaction" ," CommunicationSatisfaction"
124+ ]
125+ data2 ["OverallSatisfaction" ] = data2 [satisfaction_columns ].mean (axis = 1 )
126+ print (data2 [["PatientID" ,"OverallSatisfaction" ," Readmission" ]].head ())
127+
128+
129+ X2 = data2 [["OverallSatisfaction" ]].values
130+ y2 = data2 [" Readmission" ].values
131+
132+ scaler2 = StandardScaler ()
133+ X_scaled2 = scaler2 .fit_transform (X2 )
134+
135+ log_reg2 = LogisticRegression ()
136+ log_reg2 .fit (X_scaled2 ,y2 )
137+
138+ coef2 = log_reg2 .coef_ [0 ][0 ]
139+ intercept2 = log_reg2 .intercept_ [0 ]
140+
141+ print ("-- Logistic Regression Coefficeints --" )
142+ print (f"Intercept: { intercept :.3f} " )
143+ print (f"Coefficient for Overall Satisfaction: { coef :.3f} " )
144+
145+ if coef < 0 :
146+ direction = "Higher satisfaction is associated with LOWER probability of readmission."
147+ elif coef > 0 :
148+ direction = "Higher satisfaction is associated with HIGHER probability of readmission."
149+ else :
150+ direction = "No apparaent association between satisfaction and readmission."
151+
152+ print ("\n Interpretation:" )
153+ print (direction )
154+
155+
156+ x_range2 = np .linspace (data2 ["OverallSatisfaction" ].min (),data2 ["OverallSatisfaction" ].max (),100 ).reshape (- 1 ,1 )
157+ x_range_scaled2 = scaler2 .transform (x_range2 )
158+ y_prob2 = log_reg2 .predict_proba (x_range_scaled2 )[:,1 ]
159+
160+ plt .figure (figsize = (8 ,5 ))
161+ plt .scatter (data2 ["OverallSatisfaction" ],data2 [" Readmission" ],label = "Observed Data" , alpha = 0.7 )
162+ plt .plot (x_range2 , y_prob2 ,label = "Logistic Regression Curve" ,linewidth = 2 )
163+
164+ plt .xlabel ("Overall Satisfaction" )
165+ plt .ylabel ("Probability of Readmission (1 = Yes)" )
166+ plt .title ("Logistic Regression: Readmission vs Overall Satisfaction" )
167+ plt .legend ()
168+ plt .grid (True )
169+ plt .show ()
170+
171+
172+ y_pred2 = log_reg2 .predict (X_scaled2 )
173+ print ("-- Classification Report --" )
174+ print (classification_report (y2 ,y_pred2 ,zero_division = 0 ))
175+ print ("-- Confusion Matrix --" )
176+ print (confusion_matrix (y2 ,y_pred2 ))
0 commit comments