-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinearRegression_ComputerData.r
52 lines (38 loc) · 1.49 KB
/
LinearRegression_ComputerData.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Set working directory
setwd("C:/Users/Khushi/OneDrive/Desktop/data+codes/datasets")
# Load necessary libraries
library(dplyr)
library(caTools)
# Load data
Computer_data <- read.csv("Computer_data.csv", na.strings = "")
# Remove unnecessary columns
Computer_data <- Computer_data[-c(1, 8, 9, 10, 11)]
# Calculate correlations
cor_speed <- cor(Computer_data$price, Computer_data$speed)
cor_hd <- cor(Computer_data$price, Computer_data$hd)
# Perform ANOVA for RAM and CD
anov_ram <- aov(price ~ ram, data = Computer_data)
summary(anov_ram)
anov_cd <- aov(price ~ cd, data = Computer_data)
summary(anov_cd)
# Create boxplot
boxplot(Computer_data$price)
# Split data into training and testing sets
sample_indices <- sample.split(Computer_data$price, SplitRatio = 0.70)
trainingset <- Computer_data[sample_indices == TRUE, ]
testset <- Computer_data[sample_indices == FALSE, ]
# Build linear regression model
model <- lm(price ~ ., data = trainingset)
# Predict prices on the test set
testset$predicted_price <- predict(model, testset)
# Calculate correlation between actual and predicted prices
cor_predicted <- cor(testset$price, testset$predicted_price)
# Rename column using dplyr
a <- Computer_data %>% rename(screen_size = screen)
# Calculate mean and median for price and HD
mean_price <- mean(a$price)
median_price <- median(a$price)
mean_hd <- mean(a$hd)
median_hd <- median(a$hd)
# Select columns without "m" prefix
b <- select(a, -starts_with("m"))