|
| 1 | +# Pandas Case Study: Analyzing Sales Data |
| 2 | +# This case study introduces beginners to basic Pandas operations using a sample sales dataset |
| 3 | + |
| 4 | +import pandas as pd |
| 5 | +import numpy as np |
| 6 | + |
| 7 | +# Step 1: Create a sample sales dataset |
| 8 | +data = { |
| 9 | + 'order_id': [1, 2, 3, 4, 5, 6], |
| 10 | + 'product': ['Laptop', 'Phone', 'Tablet', 'Laptop', 'Phone', 'Tablet'], |
| 11 | + 'category': ['Electronics', 'Electronics', 'Electronics', 'Electronics', 'Electronics', 'Electronics'], |
| 12 | + 'quantity': [1, 2, 1, 3, 1, 2], |
| 13 | + 'price': [999.99, 499.99, 299.99, 999.99, 499.99, 299.99], |
| 14 | + 'date': ['2025-01-01', '2025-01-02', '2025-01-02', '2025-01-03', '2025-01-03', '2025-01-04'] |
| 15 | +} |
| 16 | +df = pd.DataFrame(data) |
| 17 | + |
| 18 | +# Step 2: Display basic information about the dataset |
| 19 | +print("Step 2: Basic Dataset Information") |
| 20 | +print("\nFirst 5 rows of the dataset:") |
| 21 | +print(df.head()) |
| 22 | +print("\nDataset Info:") |
| 23 | +print(df.info()) |
| 24 | +print("\nBasic Statistics:") |
| 25 | +print(df.describe()) |
| 26 | + |
| 27 | +# Step 3: Calculate total revenue (quantity * price) |
| 28 | +df['total_revenue'] = df['quantity'] * df['price'] |
| 29 | +print("\nStep 3: Dataset with Total Revenue") |
| 30 | +print(df[['order_id', 'product', 'quantity', 'price', 'total_revenue']]) |
| 31 | + |
| 32 | +# Step 4: Group by product and calculate total quantity sold and revenue |
| 33 | +product_summary = df.groupby('product').agg({ |
| 34 | + 'quantity': 'sum', |
| 35 | + 'total_revenue': 'sum' |
| 36 | +}).reset_index() |
| 37 | +print("\nStep 4: Product Summary") |
| 38 | +print(product_summary) |
| 39 | + |
| 40 | +# Step 5: Find the best-selling product by quantity |
| 41 | +best_seller = product_summary.loc[product_summary['quantity'].idxmax()] |
| 42 | +print("\nStep 5: Best-Selling Product") |
| 43 | +print(f"Best-selling product: {best_seller['product']} with {best_seller['quantity']} units sold") |
| 44 | + |
| 45 | +# Step 6: Filter orders with total revenue > $1000 |
| 46 | +high_value_orders = df[df['total_revenue'] > 1000] |
| 47 | +print("\nStep 6: High-Value Orders (> $1000)") |
| 48 | +print(high_value_orders[['order_id', 'product', 'total_revenue']]) |
| 49 | + |
| 50 | +# Step 7: Convert date column to datetime and extract month |
| 51 | +df['date'] = pd.to_datetime(df['date']) |
| 52 | +df['month'] = df['date'].dt.month |
| 53 | +daily_sales = df.groupby('date').agg({ |
| 54 | + 'total_revenue': 'sum' |
| 55 | +}).reset_index() |
| 56 | +print("\nStep 7: Daily Sales Summary") |
| 57 | +print(daily_sales) |
| 58 | + |
| 59 | +# Step 8: Save the processed data to a CSV file |
| 60 | +df.to_csv('processed_sales_data.csv', index=False) |
| 61 | +print("\nStep 8: Data saved to 'processed_sales_data.csv'") |
0 commit comments