|
9 | 9 | # DATA LOADING |
10 | 10 | ####################################### |
11 | 11 |
|
12 | | -st.set_page_config(layout='wide') |
| 12 | +st.set_page_config(layout='wide') |
| 13 | + |
| 14 | +@st.cache_data # Caching data loading functions |
| 15 | +def load_data(url): |
| 16 | + return pd.read_csv(url) |
13 | 17 |
|
14 | 18 | # Loading data files from the 'streamlit' directory |
15 | | -df = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv') |
16 | | -df2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv') |
17 | | -full_data2018 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv') |
18 | | -full_data2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv') |
19 | | -full_df2020 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv') |
20 | | -df2019 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv') |
21 | | -df2021 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv') |
22 | | -df2022 = pd.read_csv('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv') |
| 19 | +df = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2020.csv') |
| 20 | +df2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2018.csv') |
| 21 | +full_data2018 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2018.csv') |
| 22 | +full_data2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2019.csv') |
| 23 | +full_df2020 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/survey_results_sample_2020.csv') |
| 24 | +df2019 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2019.csv') |
| 25 | +df2021 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2021.csv') |
| 26 | +df2022 = load_data('https://raw.githubusercontent.com/Recode-Hive/Stackoverflow-Analysis/main/streamlit/df2022.csv') |
23 | 27 |
|
24 | 28 | # Filter the 2020 dataframe |
25 | 29 | df2020 = df[df['SalaryUSD'] < 200000] |
|
109 | 113 | } |
110 | 114 | df_ai.replace(short_mapping, inplace=True) |
111 | 115 |
|
| 116 | +@st.cache_data |
112 | 117 | def mean_salary(df): |
113 | 118 | mean_salary = df[df['SalaryUSD'] <= 1000000]['SalaryUSD'].mean() |
114 | 119 | df.loc[df['SalaryUSD'] > 1000000, 'SalaryUSD'] = mean_salary |
115 | 120 | return df |
116 | 121 |
|
117 | 122 | # Function to create value count plots for each column |
| 123 | +@st.cache_data |
118 | 124 | def plot_value_counts(column_name): |
119 | 125 | colors = ['skyblue', 'yellow'] |
120 | 126 | fig = px.bar(df_ai[column_name].value_counts().reset_index(), x='index', y=column_name, color_discrete_sequence=[random.choice(colors)]) |
|
0 commit comments