@@ -81,16 +81,14 @@ def find_admissions_by_age_sex(pcn_mapping: DataFrame, age_sex_ref_df: DataFrame
8181 admissions_by_age_sex_df: DataFrame containing admissions by age and sex.
8282 """
8383
84- age_band_pcn_joined_df = pcn_mapping .crossJoin (age_sex_ref_df )
85-
84+ age_band_pcn_joined_df = pcn_mapping .crossJoin (age_sex_ref_df ).distinct ()
8685 admissions_by_age_sex_df = (
8786 age_band_pcn_joined_df
8887 .join (admissions_data , ['PRACTICE_CODE' , 'AGE' , 'SEX' , 'PCN_CODE' ], how = 'left' )
8988 .groupBy ('PCN_CODE' , 'AGE_FIVE_YEAR' ).sum ('FAE_EMERGENCY' )
9089 .withColumnRenamed ('sum(FAE_EMERGENCY)' ,'admissions' )
9190 .fillna (0 , 'admissions' )
9291 .orderBy ('PCN_CODE' , 'AGE_FIVE_YEAR' ))
93-
9492 return admissions_by_age_sex_df
9593
9694
@@ -106,17 +104,15 @@ def find_admissions_by_pcn(pcn_map_df: DataFrame, admissions_data_df: DataFrame)
106104 Returns:
107105 df: DataFrame containing admissions by PCN.
108106 """
109-
110107 df = (
111108 pcn_map_df
112- .join (admissions_data_df , on = ['PCN_CODE' , 'PCN_NAME ' ], how = 'left' )
113- .groupBy ('PCN_CODE' , 'PCN_NAME' )
109+ .join (admissions_data_df , on = ['PRACTICE_CODE' , 'PCN_CODE ' ], how = 'left' )
110+ .groupBy ('PCN_CODE' )
114111 .sum ('FAE_EMERGENCY' )
115112 .withColumnRenamed ('sum(FAE_EMERGENCY)' ,'admissions' )
116113 .fillna (0 , 'admissions' )
117114 .orderBy ('PCN_CODE' )
118115 )
119-
120116 return df
121117
122118
@@ -155,7 +151,7 @@ def generate_standardised_numerator(db_name: str, table_name: str, acsc_indicato
155151 .withColumn ('stan_sum' , (F .col ('admissions' ) * F .col ('eng_population_by_age_sex' )) / F .col ('banded_pcn_pop' ))
156152 .groupBy ('PCN_CODE' , 'eng_population' ).agg (F .sum ('stan_sum' )).withColumnRenamed ('sum(stan_sum)' ,'stan_sum' )
157153 .withColumn ('DSR' , F .col ('stan_sum' ) * (1 / F .col ('eng_population' )))
158- )
154+ )
159155
160156 pcn_list_size_totals = pcn_list_size .groupBy ('PCN_CODE' ).sum ('banded_pcn_pop' ).withColumnRenamed ('sum(banded_pcn_pop)' ,'pcn_pop' )
161157
@@ -167,5 +163,5 @@ def generate_standardised_numerator(db_name: str, table_name: str, acsc_indicato
167163 .withColumn ('ATTRIBUTE_ID' , F .lit (acsc_indicator_code ))
168164 .orderBy ('PCN_CODE' )
169165 )
170-
171- return standardised_numerator
166+
167+ return standardised_numerator
0 commit comments