-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #50 from Chameleon-company/YoriMaster-patch-2
Add files via upload
- Loading branch information
Showing
15 changed files
with
266,737 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import pandas as pd | ||
|
||
# Load the CSV file | ||
df = pd.read_csv('EVCS_Usage_Three_Years.csv') | ||
|
||
# Check for negative values in the 'Total kWh' column | ||
negative_total_kwh = df[df['Total kWh'] < 0] | ||
print(f"Found {len(negative_total_kwh)} records with negative 'Total kWh':\n", negative_total_kwh) | ||
|
||
# Remove records with negative 'Total kWh' | ||
df_cleaned = df[df['Total kWh'] >= 0] | ||
|
||
# Check for missing values | ||
missing_values = df_cleaned.isnull().any().any() | ||
|
||
if missing_values: | ||
print("The dataset contains missing values.") | ||
else: | ||
print("The dataset has no missing values in any cell.") | ||
|
||
# Save the cleaned data to a new CSV file | ||
df_cleaned.to_csv('EVCS_Usage_Three_Years_Cleaned.csv', index=False) | ||
|
||
print("Data wrangling successes") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import pandas as pd | ||
|
||
# Define the column names | ||
columns = ['_id', 'CP ID', 'Connector', 'Start Date', 'Start Time', 'End Date', 'End Time', 'Total kWh', 'Site', 'Model'] | ||
|
||
# Load the datasets and skip the first row | ||
df1 = pd.read_csv('EVCS Usage_Sep16_Aug17_PerthandKinross.csv', skiprows=1, header=None) | ||
df2 = pd.read_csv('EVCS Usage_Sep17_Aug18_PerthandKinross.csv', skiprows=1, header=None) | ||
df3 = pd.read_csv('EVCS Usage_Sep18_Aug19_PerthandKinross.csv', skiprows=1, header=None) | ||
|
||
# Combine the datasets | ||
combined_df = pd.concat([df1, df2, df3], ignore_index=True) | ||
|
||
# Redefine column names | ||
combined_df.columns = columns | ||
|
||
# Convert 'Start Date' and 'End Date' to datetime format (YYYY-MM-DD) | ||
combined_df['Start Date'] = pd.to_datetime(combined_df['Start Date'], errors='coerce').dt.strftime('%Y-%m-%d') | ||
combined_df['End Date'] = pd.to_datetime(combined_df['End Date'], errors='coerce').dt.strftime('%Y-%m-%d') | ||
|
||
# Convert 'Start Time' and 'End Time' to time format (HH:MM:SS) | ||
combined_df['Start Time'] = pd.to_datetime(combined_df['Start Time'], format='%H:%M', errors='coerce').dt.strftime('%H:%M:%S') | ||
combined_df['End Time'] = pd.to_datetime(combined_df['End Time'], format='%H:%M', errors='coerce').dt.strftime('%H:%M:%S') | ||
|
||
# Reassign _id sequentially from 1 | ||
combined_df['_id'] = range(1, len(combined_df) + 1) | ||
|
||
# Save the combined DataFrame to a CSV file | ||
combined_df.to_csv('EVCS_Usage_Three_Years.csv', index=False) | ||
|
||
print("Combined DF Shape:", combined_df.shape) | ||
print("Combined DF Head:", combined_df.head()) |
66,665 changes: 66,665 additions & 0 deletions
66,665
personal-work/yulin-zhuang/EVCS_Usage_Three_Years.csv
Large diffs are not rendered by default.
Oops, something went wrong.
66,473 changes: 66,473 additions & 0 deletions
66,473
personal-work/yulin-zhuang/EVCS_Usage_Three_Years_Cleaned_Negative_Value.csv
Large diffs are not rendered by default.
Oops, something went wrong.
66,644 changes: 66,644 additions & 0 deletions
66,644
personal-work/yulin-zhuang/EVCS_Usage_With_Google_Coordinates.csv
Large diffs are not rendered by default.
Oops, something went wrong.
45 changes: 45 additions & 0 deletions
45
personal-work/yulin-zhuang/Generate site location using Google Map API.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import pandas as pd | ||
import googlemaps | ||
import time | ||
|
||
# Load the CSV | ||
df = pd.read_csv('EVCS_Usage_Three_Years.csv') | ||
|
||
# Step 1: Collect unique site names | ||
unique_sites = df['Site'].unique() | ||
|
||
# Step 2: Set up Google Maps Geocoding client | ||
gmaps = googlemaps.Client(key='AIzaSyCFd_5D3I1yhC8nguRzenQUg_NbGg_3rMU') | ||
|
||
# Function to get latitude and longitude using Google Maps API | ||
def get_coordinates_google(site): | ||
try: | ||
geocode_result = gmaps.geocode(f"{site}, Perth and Kinross, Scotland") | ||
if geocode_result: | ||
location = geocode_result[0]['geometry']['location'] | ||
return location['lat'], location['lng'] | ||
else: | ||
return None, None | ||
except Exception as e: | ||
print(f"Error fetching {site}: {e}") | ||
return None, None | ||
|
||
# Step 3: Create a dictionary for site coordinates using Google Maps API | ||
site_coordinates = {} | ||
for site in unique_sites: | ||
if site != "***TEST SITE*** Charge Your Car HQ": # Skip test site | ||
lat, lon = get_coordinates_google(site) | ||
site_coordinates[site] = (lat, lon) | ||
time.sleep(1) # Pause to avoid exceeding the rate limit | ||
|
||
# Step 4: Create latitude and longitude columns in the original dataframe | ||
df['Latitude'] = df['Site'].map(lambda site: site_coordinates.get(site, (None, None))[0]) | ||
df['Longitude'] = df['Site'].map(lambda site: site_coordinates.get(site, (None, None))[1]) | ||
|
||
# Step 5: Remove rows where 'Site' is "***TEST SITE*** Charge Your Car HQ" | ||
df = df[df['Site'] != "***TEST SITE*** Charge Your Car HQ"] | ||
|
||
# Step 6: Save the updated dataframe to a new CSV | ||
df.to_csv('EVCS_Usage_With_Google_Coordinates.csv', index=False) | ||
|
||
print("Data cleaned and Google Maps coordinates added successfully!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import dash | ||
from dash import dcc, html | ||
import plotly.express as px | ||
import pandas as pd | ||
|
||
# Load and preprocess data | ||
df = pd.read_csv('EVCS_Usage_With_Google_Coordinates.csv') | ||
|
||
# Remove rows with negative charging amounts | ||
df = df[df['Total kWh'] >= 0] | ||
|
||
# Remove rows with specific site | ||
df = df[df['Site'] != '***TEST SITE*** Charge Your Car HQ'] | ||
|
||
# Convert 'Start Time' to datetime and extract the hour | ||
df['Start Time'] = pd.to_datetime(df['Start Time'], format='%H:%M:%S').dt.hour | ||
|
||
# Calculate daily energy usage | ||
df['Start Date'] = pd.to_datetime(df['Start Date'], format='%Y-%m-%d') | ||
daily_energy = df.groupby(df['Start Date'].dt.date)['Total kWh'].sum().reset_index() | ||
daily_energy.columns = ['Date', 'Total kWh'] | ||
|
||
# Calculate the frequency of each site | ||
site_freq = df.groupby('Site').size().reset_index(name='Frequency') | ||
|
||
# Merge frequency data back to original dataframe | ||
df = pd.merge(df, site_freq, on='Site') | ||
|
||
# Map for charging station locations and frequency | ||
map_fig = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", hover_name="Site", hover_data=["Frequency"], | ||
size="Frequency", color="Frequency", | ||
title="EV Charging Station Usage Frequency", | ||
color_continuous_scale=px.colors.sequential.Plasma, size_max=15, zoom=10) | ||
map_fig.update_layout(mapbox_style="open-street-map") | ||
|
||
# Histogram for charging times | ||
avg_hour = df['Start Time'].mean() | ||
hist_fig = px.histogram(df, x='Start Time', nbins=24, title='Charging Time Histogram (24-Hour Format)', labels={'Start Time': 'Hour of the Day'}) | ||
hist_fig.update_xaxes(title_text='Hour of the Day', tickvals=list(range(24)), ticktext=[f'{i}:00' for i in range(24)]) | ||
hist_fig.update_yaxes(title_text='Frequency') | ||
hist_fig.add_vline(x=avg_hour, line_dash='dash', line_color='red', annotation_text='Average Hour', annotation_position='top left') | ||
|
||
# Pie chart for charging amount distribution | ||
bins = [0, 20, 40, 60, 80, 100, 120, 140] | ||
labels = ['0-20', '20-40', '40-60', '60-80', '80-100', '100-120', '120+'] | ||
df['Charging Amount Bin'] = pd.cut(df['Total kWh'], bins=bins, labels=labels, right=False) | ||
pie_fig = px.pie(df, names='Charging Amount Bin', title='Charging Amount Distribution', labels={'Charging Amount Bin': 'Charging Amount Bin'}) | ||
|
||
# Daily energy usage line chart | ||
daily_energy_fig = px.line(daily_energy, x='Date', y='Total kWh', title='Daily Energy Usage (Total kWh)', labels={'Date': 'Date', 'Total kWh': 'Total kWh'}) | ||
|
||
# Heatmap for charging amount vs. time | ||
heatmap_data = df.groupby(['Start Time', 'Total kWh']).size().reset_index(name='Count') | ||
heatmap_fig = px.density_heatmap(heatmap_data, x='Start Time', y='Total kWh', z='Count', | ||
color_continuous_scale='OrRd', # Gradient from purple (low frequency) to yellow (high frequency) | ||
title='Charging Amount vs. Charging Time Heatmap', | ||
labels={'Start Time': 'Hour of the Day', 'Total kWh': 'Charging Amount (kWh)', 'Count': 'Frequency'}) | ||
heatmap_fig.update_xaxes(title_text='Hour of the Day', tickvals=list(range(24)), ticktext=[f'{i}:00' for i in range(24)]) | ||
heatmap_fig.update_yaxes(title_text='Charging Amount (kWh)') | ||
|
||
# Initialize Dash app | ||
app = dash.Dash(__name__) | ||
|
||
# Define layout with adjusted spacing | ||
app.layout = html.Div([ | ||
html.H1('EV Charging Dashboard'), | ||
dcc.Graph(figure=map_fig, style={'height': '50vh'}), # Map at the top | ||
html.Div([ | ||
dcc.Graph(figure=pie_fig, style={'width': '50%','height': '50vh'}), # Pie chart on the left | ||
dcc.Graph(figure=heatmap_fig, style={'width': '50%','height': '50vh'}) # Heatmap on the right | ||
], style={'display': 'flex', 'flex-direction': 'row'}), | ||
dcc.Graph(figure=daily_energy_fig, style={'height': '50vh'}), # Daily energy usage chart | ||
dcc.Graph(figure=hist_fig, style={'height': '50vh'}) # Histogram at the bottom | ||
], style={'padding': '10px'}) | ||
|
||
# Run the app | ||
if __name__ == '__main__': | ||
app.run_server(debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
File and folder description | ||
|
||
Dataset (raw data folder): | ||
66,000 electric vehicle charging usage data in Perth and Kinross, Scotland | ||
EVCS Usage_Sep16_Aug17_PerthandKinross.csv | ||
EVCS Usage_Sep17_Aug18_PerthandKinross.csv | ||
EVCS Usage_Sep18_Aug19_PerthandKinross.csv | ||
|
||
Single function test: | ||
1.charging time analyze:Usage Analyst.py | ||
2.charging kWh analyze:kWh analyze.py | ||
|
||
Dataset merging: | ||
Combine the datasets.py | ||
#Merge the three copies of data and redistribute the index values | ||
|
||
Data integrity check: | ||
Check data integrity.py | ||
Check whether the dataset contains null values | ||
|
||
Merged dataset: | ||
EVCS_Usage_Three_Years.csv | ||
|
||
Dataset used by Dash: | ||
EVCS_Usage_Three_Years_Cleaned_Negative_Value.csv | ||
|
||
Get coordinates from Google Map API: | ||
Generate site location using Google Map API.py | ||
#Please replace API Key when using | ||
|
||
Dash dashboard: | ||
Main dash.py | ||
#I built a Dash dashboard to analyze electric vehicle charging data, including a map for station locations, a histogram for charging times, pie charts, and a heatmap to display energy usage patterns and station frequencies. |
Oops, something went wrong.