Skip to content

Commit ca318bc

Browse files
authored
Create CaseStudy.md
1 parent b71cadb commit ca318bc

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

35 Day Pandas Basic/CaseStudy.md

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Pandas Case Study: Analyzing Sales Data
2+
# This case study introduces beginners to basic Pandas operations using a sample sales dataset
3+
4+
import pandas as pd
5+
import numpy as np
6+
7+
# Step 1: Create a sample sales dataset
8+
data = {
9+
'order_id': [1, 2, 3, 4, 5, 6],
10+
'product': ['Laptop', 'Phone', 'Tablet', 'Laptop', 'Phone', 'Tablet'],
11+
'category': ['Electronics', 'Electronics', 'Electronics', 'Electronics', 'Electronics', 'Electronics'],
12+
'quantity': [1, 2, 1, 3, 1, 2],
13+
'price': [999.99, 499.99, 299.99, 999.99, 499.99, 299.99],
14+
'date': ['2025-01-01', '2025-01-02', '2025-01-02', '2025-01-03', '2025-01-03', '2025-01-04']
15+
}
16+
df = pd.DataFrame(data)
17+
18+
# Step 2: Display basic information about the dataset
19+
print("Step 2: Basic Dataset Information")
20+
print("\nFirst 5 rows of the dataset:")
21+
print(df.head())
22+
print("\nDataset Info:")
23+
print(df.info())
24+
print("\nBasic Statistics:")
25+
print(df.describe())
26+
27+
# Step 3: Calculate total revenue (quantity * price)
28+
df['total_revenue'] = df['quantity'] * df['price']
29+
print("\nStep 3: Dataset with Total Revenue")
30+
print(df[['order_id', 'product', 'quantity', 'price', 'total_revenue']])
31+
32+
# Step 4: Group by product and calculate total quantity sold and revenue
33+
product_summary = df.groupby('product').agg({
34+
'quantity': 'sum',
35+
'total_revenue': 'sum'
36+
}).reset_index()
37+
print("\nStep 4: Product Summary")
38+
print(product_summary)
39+
40+
# Step 5: Find the best-selling product by quantity
41+
best_seller = product_summary.loc[product_summary['quantity'].idxmax()]
42+
print("\nStep 5: Best-Selling Product")
43+
print(f"Best-selling product: {best_seller['product']} with {best_seller['quantity']} units sold")
44+
45+
# Step 6: Filter orders with total revenue > $1000
46+
high_value_orders = df[df['total_revenue'] > 1000]
47+
print("\nStep 6: High-Value Orders (> $1000)")
48+
print(high_value_orders[['order_id', 'product', 'total_revenue']])
49+
50+
# Step 7: Convert date column to datetime and extract month
51+
df['date'] = pd.to_datetime(df['date'])
52+
df['month'] = df['date'].dt.month
53+
daily_sales = df.groupby('date').agg({
54+
'total_revenue': 'sum'
55+
}).reset_index()
56+
print("\nStep 7: Daily Sales Summary")
57+
print(daily_sales)
58+
59+
# Step 8: Save the processed data to a CSV file
60+
df.to_csv('processed_sales_data.csv', index=False)
61+
print("\nStep 8: Data saved to 'processed_sales_data.csv'")

0 commit comments

Comments
 (0)