Merge pull request #50 from Chameleon-company/YoriMaster-patch-2

Add files via upload
Chameleon-company · Sep 27, 2024 · 25a9803 · 25a9803
2 parents ba3e8ad + 0ee2f35
commit 25a9803
Show file tree

Hide file tree

Showing 15 changed files with 266,737 additions and 0 deletions.
diff --git a/personal-work/yulin-zhuang/Check data integrity.py b/personal-work/yulin-zhuang/Check data integrity.py
@@ -0,0 +1,24 @@
+import pandas as pd
+
+# Load the CSV file
+df = pd.read_csv('EVCS_Usage_Three_Years.csv')
+
+# Check for negative values in the 'Total kWh' column
+negative_total_kwh = df[df['Total kWh'] < 0]
+print(f"Found {len(negative_total_kwh)} records with negative 'Total kWh':\n", negative_total_kwh)
+
+# Remove records with negative 'Total kWh'
+df_cleaned = df[df['Total kWh'] >= 0]
+
+# Check for missing values
+missing_values = df_cleaned.isnull().any().any()
+
+if missing_values:
+    print("The dataset contains missing values.")
+else:
+    print("The dataset has no missing values in any cell.")
+
+# Save the cleaned data to a new CSV file
+df_cleaned.to_csv('EVCS_Usage_Three_Years_Cleaned.csv', index=False)
+
+print("Data wrangling successes")
diff --git a/personal-work/yulin-zhuang/Combine the datasets.py b/personal-work/yulin-zhuang/Combine the datasets.py
@@ -0,0 +1,32 @@
+import pandas as pd
+
+# Define the column names
+columns = ['_id', 'CP ID', 'Connector', 'Start Date', 'Start Time', 'End Date', 'End Time', 'Total kWh', 'Site', 'Model']
+
+# Load the datasets and skip the first row
+df1 = pd.read_csv('EVCS Usage_Sep16_Aug17_PerthandKinross.csv', skiprows=1, header=None)
+df2 = pd.read_csv('EVCS Usage_Sep17_Aug18_PerthandKinross.csv', skiprows=1, header=None)
+df3 = pd.read_csv('EVCS Usage_Sep18_Aug19_PerthandKinross.csv', skiprows=1, header=None)
+
+# Combine the datasets
+combined_df = pd.concat([df1, df2, df3], ignore_index=True)
+
+# Redefine column names
+combined_df.columns = columns
+
+# Convert 'Start Date' and 'End Date' to datetime format (YYYY-MM-DD)
+combined_df['Start Date'] = pd.to_datetime(combined_df['Start Date'], errors='coerce').dt.strftime('%Y-%m-%d')
+combined_df['End Date'] = pd.to_datetime(combined_df['End Date'], errors='coerce').dt.strftime('%Y-%m-%d')
+
+# Convert 'Start Time' and 'End Time' to time format (HH:MM:SS)
+combined_df['Start Time'] = pd.to_datetime(combined_df['Start Time'], format='%H:%M', errors='coerce').dt.strftime('%H:%M:%S')
+combined_df['End Time'] = pd.to_datetime(combined_df['End Time'], format='%H:%M', errors='coerce').dt.strftime('%H:%M:%S')
+
+# Reassign _id sequentially from 1
+combined_df['_id'] = range(1, len(combined_df) + 1)
+
+# Save the combined DataFrame to a CSV file
+combined_df.to_csv('EVCS_Usage_Three_Years.csv', index=False)
+
+print("Combined DF Shape:", combined_df.shape)
+print("Combined DF Head:", combined_df.head())
diff --git a/personal-work/yulin-zhuang/EVCS_Usage_Three_Years.csv b/personal-work/yulin-zhuang/EVCS_Usage_Three_Years.csv
diff --git a/personal-work/yulin-zhuang/EVCS_Usage_Three_Years_Cleaned_Negative_Value.csv b/personal-work/yulin-zhuang/EVCS_Usage_Three_Years_Cleaned_Negative_Value.csv
diff --git a/personal-work/yulin-zhuang/EVCS_Usage_With_Google_Coordinates.csv b/personal-work/yulin-zhuang/EVCS_Usage_With_Google_Coordinates.csv
diff --git a/personal-work/yulin-zhuang/Generate site location using Google Map API.py b/personal-work/yulin-zhuang/Generate site location using Google Map API.py
@@ -0,0 +1,45 @@
+import pandas as pd
+import googlemaps
+import time
+
+# Load the CSV
+df = pd.read_csv('EVCS_Usage_Three_Years.csv')
+
+# Step 1: Collect unique site names
+unique_sites = df['Site'].unique()
+
+# Step 2: Set up Google Maps Geocoding client
+gmaps = googlemaps.Client(key='AIzaSyCFd_5D3I1yhC8nguRzenQUg_NbGg_3rMU')
+
+# Function to get latitude and longitude using Google Maps API
+def get_coordinates_google(site):
+    try:
+        geocode_result = gmaps.geocode(f"{site}, Perth and Kinross, Scotland")
+        if geocode_result:
+            location = geocode_result[0]['geometry']['location']
+            return location['lat'], location['lng']
+        else:
+            return None, None
+    except Exception as e:
+        print(f"Error fetching {site}: {e}")
+        return None, None
+
+# Step 3: Create a dictionary for site coordinates using Google Maps API
+site_coordinates = {}
+for site in unique_sites:
+    if site != "***TEST SITE*** Charge Your Car HQ":  # Skip test site
+        lat, lon = get_coordinates_google(site)
+        site_coordinates[site] = (lat, lon)
+        time.sleep(1)  # Pause to avoid exceeding the rate limit
+
+# Step 4: Create latitude and longitude columns in the original dataframe
+df['Latitude'] = df['Site'].map(lambda site: site_coordinates.get(site, (None, None))[0])
+df['Longitude'] = df['Site'].map(lambda site: site_coordinates.get(site, (None, None))[1])
+
+# Step 5: Remove rows where 'Site' is "***TEST SITE*** Charge Your Car HQ"
+df = df[df['Site'] != "***TEST SITE*** Charge Your Car HQ"]
+
+# Step 6: Save the updated dataframe to a new CSV
+df.to_csv('EVCS_Usage_With_Google_Coordinates.csv', index=False)
+
+print("Data cleaned and Google Maps coordinates added successfully!")
diff --git a/personal-work/yulin-zhuang/Main dash.py b/personal-work/yulin-zhuang/Main dash.py
@@ -0,0 +1,78 @@
+import dash
+from dash import dcc, html
+import plotly.express as px
+import pandas as pd
+
+# Load and preprocess data
+df = pd.read_csv('EVCS_Usage_With_Google_Coordinates.csv')
+
+# Remove rows with negative charging amounts
+df = df[df['Total kWh'] >= 0]
+
+# Remove rows with specific site
+df = df[df['Site'] != '***TEST SITE*** Charge Your Car HQ']
+
+# Convert 'Start Time' to datetime and extract the hour
+df['Start Time'] = pd.to_datetime(df['Start Time'], format='%H:%M:%S').dt.hour
+
+# Calculate daily energy usage
+df['Start Date'] = pd.to_datetime(df['Start Date'], format='%Y-%m-%d')
+daily_energy = df.groupby(df['Start Date'].dt.date)['Total kWh'].sum().reset_index()
+daily_energy.columns = ['Date', 'Total kWh']
+
+# Calculate the frequency of each site
+site_freq = df.groupby('Site').size().reset_index(name='Frequency')
+
+# Merge frequency data back to original dataframe
+df = pd.merge(df, site_freq, on='Site')
+
+# Map for charging station locations and frequency
+map_fig = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", hover_name="Site", hover_data=["Frequency"],
+                            size="Frequency", color="Frequency",
+                            title="EV Charging Station Usage Frequency",
+                            color_continuous_scale=px.colors.sequential.Plasma, size_max=15, zoom=10)
+map_fig.update_layout(mapbox_style="open-street-map")
+
+# Histogram for charging times
+avg_hour = df['Start Time'].mean()
+hist_fig = px.histogram(df, x='Start Time', nbins=24, title='Charging Time Histogram (24-Hour Format)', labels={'Start Time': 'Hour of the Day'})
+hist_fig.update_xaxes(title_text='Hour of the Day', tickvals=list(range(24)), ticktext=[f'{i}:00' for i in range(24)])
+hist_fig.update_yaxes(title_text='Frequency')
+hist_fig.add_vline(x=avg_hour, line_dash='dash', line_color='red', annotation_text='Average Hour', annotation_position='top left')
+
+# Pie chart for charging amount distribution
+bins = [0, 20, 40, 60, 80, 100, 120, 140]
+labels = ['0-20', '20-40', '40-60', '60-80', '80-100', '100-120', '120+']
+df['Charging Amount Bin'] = pd.cut(df['Total kWh'], bins=bins, labels=labels, right=False)
+pie_fig = px.pie(df, names='Charging Amount Bin', title='Charging Amount Distribution', labels={'Charging Amount Bin': 'Charging Amount Bin'})
+
+# Daily energy usage line chart
+daily_energy_fig = px.line(daily_energy, x='Date', y='Total kWh', title='Daily Energy Usage (Total kWh)', labels={'Date': 'Date', 'Total kWh': 'Total kWh'})
+
+# Heatmap for charging amount vs. time
+heatmap_data = df.groupby(['Start Time', 'Total kWh']).size().reset_index(name='Count')
+heatmap_fig = px.density_heatmap(heatmap_data, x='Start Time', y='Total kWh', z='Count',
+                                color_continuous_scale='OrRd',  # Gradient from purple (low frequency) to yellow (high frequency)
+                                title='Charging Amount vs. Charging Time Heatmap',
+                                labels={'Start Time': 'Hour of the Day', 'Total kWh': 'Charging Amount (kWh)', 'Count': 'Frequency'})
+heatmap_fig.update_xaxes(title_text='Hour of the Day', tickvals=list(range(24)), ticktext=[f'{i}:00' for i in range(24)])
+heatmap_fig.update_yaxes(title_text='Charging Amount (kWh)')
+
+# Initialize Dash app
+app = dash.Dash(__name__)
+
+# Define layout with adjusted spacing
+app.layout = html.Div([
+    html.H1('EV Charging Dashboard'),
+    dcc.Graph(figure=map_fig, style={'height': '50vh'}),  # Map at the top
+    html.Div([
+        dcc.Graph(figure=pie_fig, style={'width': '50%','height': '50vh'}),  # Pie chart on the left
+        dcc.Graph(figure=heatmap_fig, style={'width': '50%','height': '50vh'})  # Heatmap on the right
+    ], style={'display': 'flex', 'flex-direction': 'row'}),
+    dcc.Graph(figure=daily_energy_fig, style={'height': '50vh'}),  # Daily energy usage chart
+    dcc.Graph(figure=hist_fig, style={'height': '50vh'})  # Histogram at the bottom
+], style={'padding': '10px'})
+
+# Run the app
+if __name__ == '__main__':
+    app.run_server(debug=True)
diff --git a/personal-work/yulin-zhuang/README.md b/personal-work/yulin-zhuang/README.md
@@ -0,0 +1,33 @@
+File and folder description
+
+Dataset (raw data folder):
+66,000 electric vehicle charging usage data in Perth and Kinross, Scotland
+EVCS Usage_Sep16_Aug17_PerthandKinross.csv
+EVCS Usage_Sep17_Aug18_PerthandKinross.csv
+EVCS Usage_Sep18_Aug19_PerthandKinross.csv
+
+Single function test:
+1.charging time analyze:Usage Analyst.py
+2.charging kWh analyze:kWh analyze.py
+
+Dataset merging:
+Combine the datasets.py
+#Merge the three copies of data and redistribute the index values
+
+Data integrity check:
+Check data integrity.py
+Check whether the dataset contains null values
+
+Merged dataset:
+EVCS_Usage_Three_Years.csv
+
+Dataset used by Dash:
+EVCS_Usage_Three_Years_Cleaned_Negative_Value.csv
+
+Get coordinates from Google Map API:
+Generate site location using Google Map API.py
+#Please replace API Key when using
+
+Dash dashboard:
+Main dash.py
+#I built a Dash dashboard to analyze electric vehicle charging data, including a map for station locations, a histogram for charging times, pie charts, and a heatmap to display energy usage patterns and station frequencies.