Skip to content

Commit

Permalink
Merge pull request #50 from Chameleon-company/YoriMaster-patch-2
Browse files Browse the repository at this point in the history
Add files via upload
  • Loading branch information
jamesdaviesdeveloper authored Sep 27, 2024
2 parents ba3e8ad + 0ee2f35 commit 25a9803
Show file tree
Hide file tree
Showing 15 changed files with 266,737 additions and 0 deletions.
24 changes: 24 additions & 0 deletions personal-work/yulin-zhuang/Check data integrity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pandas as pd

# Load the CSV file
df = pd.read_csv('EVCS_Usage_Three_Years.csv')

# Check for negative values in the 'Total kWh' column
negative_total_kwh = df[df['Total kWh'] < 0]
print(f"Found {len(negative_total_kwh)} records with negative 'Total kWh':\n", negative_total_kwh)

# Remove records with negative 'Total kWh'
df_cleaned = df[df['Total kWh'] >= 0]

# Check for missing values
missing_values = df_cleaned.isnull().any().any()

if missing_values:
print("The dataset contains missing values.")
else:
print("The dataset has no missing values in any cell.")

# Save the cleaned data to a new CSV file
df_cleaned.to_csv('EVCS_Usage_Three_Years_Cleaned.csv', index=False)

print("Data wrangling successes")
32 changes: 32 additions & 0 deletions personal-work/yulin-zhuang/Combine the datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pandas as pd

# Define the column names
columns = ['_id', 'CP ID', 'Connector', 'Start Date', 'Start Time', 'End Date', 'End Time', 'Total kWh', 'Site', 'Model']

# Load the datasets and skip the first row
df1 = pd.read_csv('EVCS Usage_Sep16_Aug17_PerthandKinross.csv', skiprows=1, header=None)
df2 = pd.read_csv('EVCS Usage_Sep17_Aug18_PerthandKinross.csv', skiprows=1, header=None)
df3 = pd.read_csv('EVCS Usage_Sep18_Aug19_PerthandKinross.csv', skiprows=1, header=None)

# Combine the datasets
combined_df = pd.concat([df1, df2, df3], ignore_index=True)

# Redefine column names
combined_df.columns = columns

# Convert 'Start Date' and 'End Date' to datetime format (YYYY-MM-DD)
combined_df['Start Date'] = pd.to_datetime(combined_df['Start Date'], errors='coerce').dt.strftime('%Y-%m-%d')
combined_df['End Date'] = pd.to_datetime(combined_df['End Date'], errors='coerce').dt.strftime('%Y-%m-%d')

# Convert 'Start Time' and 'End Time' to time format (HH:MM:SS)
combined_df['Start Time'] = pd.to_datetime(combined_df['Start Time'], format='%H:%M', errors='coerce').dt.strftime('%H:%M:%S')
combined_df['End Time'] = pd.to_datetime(combined_df['End Time'], format='%H:%M', errors='coerce').dt.strftime('%H:%M:%S')

# Reassign _id sequentially from 1
combined_df['_id'] = range(1, len(combined_df) + 1)

# Save the combined DataFrame to a CSV file
combined_df.to_csv('EVCS_Usage_Three_Years.csv', index=False)

print("Combined DF Shape:", combined_df.shape)
print("Combined DF Head:", combined_df.head())
66,665 changes: 66,665 additions & 0 deletions personal-work/yulin-zhuang/EVCS_Usage_Three_Years.csv

Large diffs are not rendered by default.

66,473 changes: 66,473 additions & 0 deletions personal-work/yulin-zhuang/EVCS_Usage_Three_Years_Cleaned_Negative_Value.csv

Large diffs are not rendered by default.

66,644 changes: 66,644 additions & 0 deletions personal-work/yulin-zhuang/EVCS_Usage_With_Google_Coordinates.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pandas as pd
import googlemaps
import time

# Load the CSV
df = pd.read_csv('EVCS_Usage_Three_Years.csv')

# Step 1: Collect unique site names
unique_sites = df['Site'].unique()

# Step 2: Set up Google Maps Geocoding client
gmaps = googlemaps.Client(key='AIzaSyCFd_5D3I1yhC8nguRzenQUg_NbGg_3rMU')

# Function to get latitude and longitude using Google Maps API
def get_coordinates_google(site):
try:
geocode_result = gmaps.geocode(f"{site}, Perth and Kinross, Scotland")
if geocode_result:
location = geocode_result[0]['geometry']['location']
return location['lat'], location['lng']
else:
return None, None
except Exception as e:
print(f"Error fetching {site}: {e}")
return None, None

# Step 3: Create a dictionary for site coordinates using Google Maps API
site_coordinates = {}
for site in unique_sites:
if site != "***TEST SITE*** Charge Your Car HQ": # Skip test site
lat, lon = get_coordinates_google(site)
site_coordinates[site] = (lat, lon)
time.sleep(1) # Pause to avoid exceeding the rate limit

# Step 4: Create latitude and longitude columns in the original dataframe
df['Latitude'] = df['Site'].map(lambda site: site_coordinates.get(site, (None, None))[0])
df['Longitude'] = df['Site'].map(lambda site: site_coordinates.get(site, (None, None))[1])

# Step 5: Remove rows where 'Site' is "***TEST SITE*** Charge Your Car HQ"
df = df[df['Site'] != "***TEST SITE*** Charge Your Car HQ"]

# Step 6: Save the updated dataframe to a new CSV
df.to_csv('EVCS_Usage_With_Google_Coordinates.csv', index=False)

print("Data cleaned and Google Maps coordinates added successfully!")
78 changes: 78 additions & 0 deletions personal-work/yulin-zhuang/Main dash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import dash
from dash import dcc, html
import plotly.express as px
import pandas as pd

# Load and preprocess data
df = pd.read_csv('EVCS_Usage_With_Google_Coordinates.csv')

# Remove rows with negative charging amounts
df = df[df['Total kWh'] >= 0]

# Remove rows with specific site
df = df[df['Site'] != '***TEST SITE*** Charge Your Car HQ']

# Convert 'Start Time' to datetime and extract the hour
df['Start Time'] = pd.to_datetime(df['Start Time'], format='%H:%M:%S').dt.hour

# Calculate daily energy usage
df['Start Date'] = pd.to_datetime(df['Start Date'], format='%Y-%m-%d')
daily_energy = df.groupby(df['Start Date'].dt.date)['Total kWh'].sum().reset_index()
daily_energy.columns = ['Date', 'Total kWh']

# Calculate the frequency of each site
site_freq = df.groupby('Site').size().reset_index(name='Frequency')

# Merge frequency data back to original dataframe
df = pd.merge(df, site_freq, on='Site')

# Map for charging station locations and frequency
map_fig = px.scatter_mapbox(df, lat="Latitude", lon="Longitude", hover_name="Site", hover_data=["Frequency"],
size="Frequency", color="Frequency",
title="EV Charging Station Usage Frequency",
color_continuous_scale=px.colors.sequential.Plasma, size_max=15, zoom=10)
map_fig.update_layout(mapbox_style="open-street-map")

# Histogram for charging times
avg_hour = df['Start Time'].mean()
hist_fig = px.histogram(df, x='Start Time', nbins=24, title='Charging Time Histogram (24-Hour Format)', labels={'Start Time': 'Hour of the Day'})
hist_fig.update_xaxes(title_text='Hour of the Day', tickvals=list(range(24)), ticktext=[f'{i}:00' for i in range(24)])
hist_fig.update_yaxes(title_text='Frequency')
hist_fig.add_vline(x=avg_hour, line_dash='dash', line_color='red', annotation_text='Average Hour', annotation_position='top left')

# Pie chart for charging amount distribution
bins = [0, 20, 40, 60, 80, 100, 120, 140]
labels = ['0-20', '20-40', '40-60', '60-80', '80-100', '100-120', '120+']
df['Charging Amount Bin'] = pd.cut(df['Total kWh'], bins=bins, labels=labels, right=False)
pie_fig = px.pie(df, names='Charging Amount Bin', title='Charging Amount Distribution', labels={'Charging Amount Bin': 'Charging Amount Bin'})

# Daily energy usage line chart
daily_energy_fig = px.line(daily_energy, x='Date', y='Total kWh', title='Daily Energy Usage (Total kWh)', labels={'Date': 'Date', 'Total kWh': 'Total kWh'})

# Heatmap for charging amount vs. time
heatmap_data = df.groupby(['Start Time', 'Total kWh']).size().reset_index(name='Count')
heatmap_fig = px.density_heatmap(heatmap_data, x='Start Time', y='Total kWh', z='Count',
color_continuous_scale='OrRd', # Gradient from purple (low frequency) to yellow (high frequency)
title='Charging Amount vs. Charging Time Heatmap',
labels={'Start Time': 'Hour of the Day', 'Total kWh': 'Charging Amount (kWh)', 'Count': 'Frequency'})
heatmap_fig.update_xaxes(title_text='Hour of the Day', tickvals=list(range(24)), ticktext=[f'{i}:00' for i in range(24)])
heatmap_fig.update_yaxes(title_text='Charging Amount (kWh)')

# Initialize Dash app
app = dash.Dash(__name__)

# Define layout with adjusted spacing
app.layout = html.Div([
html.H1('EV Charging Dashboard'),
dcc.Graph(figure=map_fig, style={'height': '50vh'}), # Map at the top
html.Div([
dcc.Graph(figure=pie_fig, style={'width': '50%','height': '50vh'}), # Pie chart on the left
dcc.Graph(figure=heatmap_fig, style={'width': '50%','height': '50vh'}) # Heatmap on the right
], style={'display': 'flex', 'flex-direction': 'row'}),
dcc.Graph(figure=daily_energy_fig, style={'height': '50vh'}), # Daily energy usage chart
dcc.Graph(figure=hist_fig, style={'height': '50vh'}) # Histogram at the bottom
], style={'padding': '10px'})

# Run the app
if __name__ == '__main__':
app.run_server(debug=True)
33 changes: 33 additions & 0 deletions personal-work/yulin-zhuang/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
File and folder description

Dataset (raw data folder):
66,000 electric vehicle charging usage data in Perth and Kinross, Scotland
EVCS Usage_Sep16_Aug17_PerthandKinross.csv
EVCS Usage_Sep17_Aug18_PerthandKinross.csv
EVCS Usage_Sep18_Aug19_PerthandKinross.csv

Single function test:
1.charging time analyze:Usage Analyst.py
2.charging kWh analyze:kWh analyze.py

Dataset merging:
Combine the datasets.py
#Merge the three copies of data and redistribute the index values

Data integrity check:
Check data integrity.py
Check whether the dataset contains null values

Merged dataset:
EVCS_Usage_Three_Years.csv

Dataset used by Dash:
EVCS_Usage_Three_Years_Cleaned_Negative_Value.csv

Get coordinates from Google Map API:
Generate site location using Google Map API.py
#Please replace API Key when using

Dash dashboard:
Main dash.py
#I built a Dash dashboard to analyze electric vehicle charging data, including a map for station locations, a histogram for charging times, pie charts, and a heatmap to display energy usage patterns and station frequencies.
Loading

0 comments on commit 25a9803

Please sign in to comment.