-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinearRegression_CarPrice.py
79 lines (63 loc) · 3.22 KB
/
LinearRegression_CarPrice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#EDA and linear regression
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
os.chdir("../path")
mtcars = pd.read_csv('CarPrice_Assignment.csv')
print(mtcars.head())
print(mtcars.describe())
# Exploratory Data Analysis (EDA) with visualizations
# Pairplot to visualize relationships between all numerical variables
sns.pairplot(mtcars)
#sns.pairplot(mtcars.drop(['car_ID'],axis=1),height=3)
# Countplot for categorical variable 'enginetype'
sns.countplot(x='enginetype', data=mtcars)
# Scatterplots of selected variables against 'price'
sns.scatterplot(x='horsepower', y='price', data=mtcars)
sns.scatterplot(x='compressionratio', y='price', data=mtcars)
sns.scatterplot(x='enginesize', y='price', data=mtcars)
sns.scatterplot(x='cylindernumber', y='price', data=mtcars)
sns.scatterplot(x='enginetype', y='price', data=mtcars)
sns.scatterplot(x='carheight', y='price', data=mtcars)
sns.scatterplot(x='carwidth', y='price', data=mtcars)
sns.scatterplot(x='carlength', y='price', data=mtcars)
sns.scatterplot(x='wheelbase', y='price', data=mtcars)
sns.scatterplot(x='fueltype', y='price', data=mtcars)
# Boxplot of 'price' to visualize distribution and outliers
sns.boxplot(y='price', data=mtcars)
# Boxplot of 'price' grouped by 'enginetype'
sns.boxplot(x='enginetype', y='price', data=mtcars)
# Boxplot of 'compressionratio' grouped by 'fueltype'
sns.boxplot(x='fueltype', y='compressionratio',data=mtcars)
# Uncomment the next line to display plots if using Matplotlib
# plt.show()
# Single variable regression (Highway MPG vs Price)
x = mtcars[['highwaympg']] #independent variable
y = mtcars[['price']] # dependent variable
reg_single = LinearRegression()
# reg_single = linear_model.LinearRegression()
reg_single.fit(x, y)
#reg.fit([[0,0],[1,1],[2,2],[0,1,2]])
print("\nCoefficients for single variable regression (Highway MPG vs Price): ")
print(reg_single.coef_)
## Visualizing the relationship between Highway MPG and Price
sns.regplot(x=x['highwaympg'], y=y['price'])
plt.xlabel('Highway MPG')
plt.ylabel('Price')
plt.title('Regression plot: Highway MPG vs Price')
plt.show()
# Coefficient: -809.27352829 (inverse relation) -> Output Interpretation: as the fuel efficiency (highwaympg) increases, the price of the car decreases by approximately 809.27352829 units.
# It seems counterintuitive at first, but if we think about it, the fuel-efficient cars are designed to be economical, targetting the budget-conscious consumers. Luxury cars on the other hand tend to be less fuel-efficient and are priced higher.
# Multiple regression (Price vs Horsepower and Curb Weight)
X = mtcars[['horsepower', 'curbweight']] # independent variables
Y = mtcars[['price']] # dependent
reg_multiple = LinearRegression()
#reg=linear_model.LinearRegression()
reg_multiple.fit(X, Y)
print("\nCoefficients for multiple regression (Price vs Horsepower and Curb Weight):")
print(reg_multiple.coef_)
# Coefficients for ['horsepower', 'curbweight'] -> [83.81225563, 8.03749183]
# Interpretation: As the horsepower & curbweight increases, the car price increases by those many units respectively