Skip to content

Commit 3675b5f

Browse files
author
Ramon
committed
Source code and tidy dataset
In this first commit, it is included the source code for this assignment and the tidy dataset obtained.
0 parents  commit 3675b5f

File tree

2 files changed

+286
-0
lines changed

2 files changed

+286
-0
lines changed

run_analysis.R

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
## run_analysis.R
2+
## Getting and Cleaning Data Course Project
3+
## Ramon Perez Hernandez
4+
5+
6+
# **********
7+
# * TASK 1 *
8+
# **********
9+
# "Merge the training and the test sets to create one data set"
10+
11+
12+
# Download and extract all files.
13+
14+
url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
15+
name <- "data.zip"
16+
if(!file.exists("data.zip")) {
17+
download.file(url, destfile = name, method = "curl")
18+
if(!file.exists("UCI HAR Dataset")) {
19+
unzip(name)
20+
}
21+
}
22+
23+
# The final data frame will be composed by:
24+
# - Subject who performed the activity (from subject_train/test.txt).
25+
# - Activity (from y_train/test.txt).
26+
# - Measures (from X_train/test.txt).
27+
28+
# Loading train data frame.
29+
train_df <- cbind(read.table("UCI HAR Dataset/train/subject_train.txt"),
30+
read.table("UCI HAR Dataset/train/y_train.txt"),
31+
read.table("UCI HAR Dataset/train/X_train.txt"))
32+
33+
# Loading test data frame.
34+
test_df <- cbind(read.table("UCI HAR Dataset/test/subject_test.txt"),
35+
read.table("UCI HAR Dataset/test/y_test.txt"),
36+
read.table("UCI HAR Dataset/test/X_test.txt"))
37+
38+
# Merging train and test data frame.
39+
df <- rbind(train_df, test_df)
40+
41+
42+
# **********
43+
# * TASK 2 *
44+
# **********
45+
# "Extract only the measurements on the mean and standard deviation for each measurement"
46+
47+
48+
# Read features.txt, which have the names for measures in X_train/text.txt,
49+
# and transform them to a character vector.
50+
feat_names <- read.table("UCI HAR Dataset/features.txt")
51+
feat_names <- as.character(feat_names$V2)
52+
53+
# Look for the position of names which contains "mean()" or "std()" and add them 2 in
54+
# order to choose the correct columns in df (remember that first and second column in df
55+
# are the subject and the activity).
56+
positions <- grep("mean\\(\\)|std\\(\\)", feat_names) + 2
57+
58+
# Choose "positions" columns + first and second column from df.
59+
df <- df[,c(1,2,positions)]
60+
61+
62+
# **********
63+
# * TASK 3 *
64+
# **********
65+
# "Use descriptive activity names to name the activities in the data set"
66+
67+
68+
# Read activity_labels.txt, which have the names for every activity, and transform
69+
# them to a character vector.
70+
act_names <- read.table("UCI HAR Dataset/activity_labels.txt")
71+
act_names <- as.character(act_names$V2)
72+
73+
# Transform df second column into factor, using act_names as levels.
74+
df[,2] <- factor(df[,2])
75+
levels(df[,2]) <- act_names
76+
77+
78+
# **********
79+
# * TASK 4 *
80+
# **********
81+
# "Appropriately label the data set with descriptive variable names"
82+
83+
84+
# First and second column will be called "subject" and "activity", respectively.
85+
# The rest of columns will use "feat_names" names as follows.
86+
colnames(df) <- c("subject","activity",feat_names[positions-2])
87+
88+
89+
# **********
90+
# * TASK 5 *
91+
# **********
92+
# "From the data set in step 4, creates a second, independent tidy data set
93+
# with the average of each variable for each activity and each subject"
94+
95+
96+
# Here we will need dplyr package with group_by/summarise_each functions.
97+
library(dplyr)
98+
tidy_df <- df %>% group_by(subject, activity) %>% summarise_each(funs(mean))
99+
100+
# Changing these column names to "MEAN-...".
101+
colnames(tidy_df) <- c("subject","activity",paste("MEAN-",
102+
feat_names[positions-2], sep = ""))
103+
104+
# Save tidy_df into "tidy_df.txt" file.
105+
write.table(tidy_df, "tidy_df.txt", row.names=FALSE)

0 commit comments

Comments
 (0)