diff --git a/.Rhistory b/.Rhistory index 85836757..cbe17324 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,69 @@ -plot(set[, -3], -main = 'SVM (Conjunto de Testing)', -xlab = 'CP1', ylab = 'CP2', -xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid==2, 'deepskyblue', -ifelse(y_grid == 1, 'springgreen3', 'tomato'))) -points(set, pch = 21, bg = ifelse(set[, 3]==2, 'blue3', -ifelse(set[, 3] == 1, 'green4', 'red3'))) -set = testing_set -X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.02) -X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.02) -grid_set = expand.grid(X1, X2) -colnames(grid_set) = c('PC1', 'PC2') -y_grid = predict(classifier, newdata = grid_set) -plot(set[, -3], -main = 'SVM (Conjunto de Testing)', -xlab = 'CP1', ylab = 'CP2', -xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid==2, 'deepskyblue', -ifelse(y_grid == 1, 'springgreen3', 'tomato'))) -points(set, pch = 21, bg = ifelse(set[, 3]==2, 'blue3', -ifelse(set[, 3] == 1, 'green4', 'red3'))) -setwd("~/Developer/Projects/Matematicas/machinelearning-az/datasets/Part 9 - Dimensionality Reduction/Section 44 - Linear Discriminant Analysis (LDA)") -# Aplicar LDA -library(MASS) +install.packages("caTools") +# Plantilla para el Pre Procesado de Datos - Datos faltantes # Importar el dataset -dataset = read.csv('Wine.csv') -# Dividir los datos en conjunto de entrenamiento y conjunto de test -# install.packages("caTools") -library(caTools) -set.seed(123) -split = sample.split(dataset$Customer_Segment, SplitRatio = 0.8) -training_set = subset(dataset, split == TRUE) -testing_set = subset(dataset, split == FALSE) -# Escalado de valores -training_set[,-14] = scale(training_set[,-14]) -testing_set[,-14] = scale(testing_set[,-14]) -lda = lda(formula = Customer_Segment ~ ., data = training_set) -training_set = predict(lda, training_set) -# Ajustar el modelo de SVM con el conjunto de entrenamiento. -classifier = svm(formula = Customer_Segment ~ ., -data = training_set, -type = "C-classification", -kernel = "linear") -library(caTools) -set.seed(123) -split = sample.split(dataset$Customer_Segment, SplitRatio = 0.8) -training_set = subset(dataset, split == TRUE) -testing_set = subset(dataset, split == FALSE) -training_set[,-14] = scale(training_set[,-14]) -testing_set[,-14] = scale(testing_set[,-14]) -training_set = as.data.frame(predict(lda, training_set)) -View(training_set) -training_set = training_set[, c(5, 6, 1)] -View(testing_set) -View(testing_set) -testing_set = as.data.frame(predict(lda, testing_set)) -testing_set = testing_set[, c(5, 6, 1)] -# Ajustar el modelo de SVM con el conjunto de entrenamiento. -library(e1071) -classifier = svm(formula = class ~ ., -data = training_set, -type = "C-classification", -kernel = "linear") -# Predicción de los resultados con el conjunto de testing -y_pred = predict(classifier, newdata = testing_set[,-3]) -# Crear la matriz de confusión -cm = table(testing_set[, 3], y_pred) -cm -library(ElemStatLearn) -set = training_set -X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.025) -X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.025) -grid_set = expand.grid(X1, X2) -colnames(grid_set) = c('x.LD1', 'x.LD2') -y_grid = predict(classifier, newdata = grid_set) -plot(set[, -3], -main = 'SVM (Conjunto de Entrenamiento)', -xlab = 'DL1', ylab = 'DL2', -xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid==2, 'deepskyblue', -ifelse(y_grid == 1, 'springgreen3', 'tomato'))) -points(set, pch = 21, bg = ifelse(set[, 3]==2, 'blue3', -ifelse(set[, 3] == 1, 'green4', 'red3'))) -set = testing_set -X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.02) -X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.02) -grid_set = expand.grid(X1, X2) -colnames(grid_set) = c('x.LD1', 'x.LD2') -y_grid = predict(classifier, newdata = grid_set) -plot(set[, -3], -main = 'SVM (Conjunto de Testing)', -xlab = 'DL1', ylab = 'DL2', -xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid==2, 'deepskyblue', -ifelse(y_grid == 1, 'springgreen3', 'tomato'))) -points(set, pch = 21, bg = ifelse(set[, 3]==2, 'blue3', -ifelse(set[, 3] == 1, 'green4', 'red3'))) -setwd("~/Developer/Projects/Matematicas/machinelearning-az/datasets/Part 9 - Dimensionality Reduction/Section 45 - Kernel PCA") +dataset = read.csv('Data.csv') +getwd() +setwd("~/Documents/Cursos Udemy/Matematicas/machinelearning-az/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------") +# Plantilla para el Pre Procesado de Datos - Datos faltantes # Importar el dataset -dataset = read.csv('Social_Network_Ads.csv') -dataset = dataset[, 3:5] -# Dividir los datos en conjunto de entrenamiento y conjunto de test -# install.packages("caTools") -library(caTools) -set.seed(123) -split = sample.split(dataset$Purchased, SplitRatio = 0.75) -training_set = subset(dataset, split == TRUE) -testing_set = subset(dataset, split == FALSE) -# Escalado de valores -training_set[,1:2] = scale(training_set[,1:2]) -testing_set[,1:2] = scale(testing_set[,1:2]) -# Ajustar el modelo de regresión logística con el conjunto de entrenamiento. -classifier = glm(formula = Purchased ~ ., -data = training_set, -family = binomial) -# Predicción de los resultados con el conjunto de testing -prob_pred = predict(classifier, type = "response", -newdata = testing_set[,-3]) -y_pred = ifelse(prob_pred> 0.5, 1, 0) -# Crear la matriz de confusión -cm = table(testing_set[, 3], y_pred) -# Visualización del conjunto de entrenamiento -#install.packages("ElemStatLearn") -library(ElemStatLearn) -set = training_set -X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) -X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) -grid_set = expand.grid(X1, X2) -colnames(grid_set) = c('Age', 'EstimatedSalary') -prob_set = predict(classifier, type = 'response', newdata = grid_set) -y_grid = ifelse(prob_set > 0.5, 1, 0) -plot(set[, -3], -main = 'Clasificación (Conjunto de Entrenamiento)', -xlab = 'Edad', ylab = 'Sueldo Estimado', -xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) -points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -# Aplicar Kernel ACP -install.packages("kernlab") -# Aplicar Kernel ACP -#install.packages("kernlab") -library(kernlab) -View(training_set) -kpca = kpca(~., data = training_set[, -3], -kernel = "rbfdot", features = 2) -training_set = as.data.frame(predict(kpca, training_set)) -View(testing_set) -View(training_set) -dataset = read.csv('Social_Network_Ads.csv') -dataset = dataset[, 3:5] -# Dividir los datos en conjunto de entrenamiento y conjunto de test -# install.packages("caTools") -library(caTools) -set.seed(123) -split = sample.split(dataset$Purchased, SplitRatio = 0.75) -training_set = subset(dataset, split == TRUE) -testing_set = subset(dataset, split == FALSE) -# Escalado de valores -training_set[,1:2] = scale(training_set[,1:2]) -testing_set[,1:2] = scale(testing_set[,1:2]) -library(kernlab) -kpca = kpca(~., data = training_set[, -3], -kernel = "rbfdot", features = 2) -training_set_pca = as.data.frame(predict(kpca, training_set)) -View(training_set) -View(training_set) -View(training_set_pca) -View(training_set_pca) -training_set_pca$Purchased = training_set$Purchased -testing_set_pca = as.data.frame(predict(kpca, testing_set)) -testing_set_pca$Purchased = testing_set$Purchased -View(training_set_pca) -View(training_set) -View(testing_set_pca) -# Ajustar el modelo de regresión logística con el conjunto de entrenamiento. -classifier = glm(formula = Purchased ~ ., -data = training_set_pca, -family = binomial) -# Predicción de los resultados con el conjunto de testing -prob_pred = predict(classifier, type = "response", -newdata = testing_set_pca[,-3]) -y_pred = ifelse(prob_pred> 0.5, 1, 0) -# Crear la matriz de confusión -cm = table(testing_set_pca[, 3], y_pred) -cm -library(ElemStatLearn) -set = training_set_pca -X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) -X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) -grid_set = expand.grid(X1, X2) -colnames(grid_set) = c('V1', 'V2') -prob_set = predict(classifier, type = 'response', newdata = grid_set) -y_grid = ifelse(prob_set > 0.5, 1, 0) -plot(set[, -3], -main = 'Clasificación (Conjunto de Entrenamiento)', -xlab = 'CP1', ylab = 'CP2', -xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) -points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) -setwd("~/Developer/Projects/Matematicas/machinelearning-az/datasets/Part 10 - Model Selection & Boosting/Section 48 - Model Selection") -# Importar el dataset -dataset = read.csv('Social_Network_Ads.csv') +dataset = read.csv('Data.csv') +View(dataset) +# Tratamiento de los valores NA +dataset$Age = ifelse(is.na(dataset$Age), +ave(dataset$Age, FUN = function(x) mean(x, na.rm = TRUE)), +dataset$Age) +dataset$Salary = ifelse(is.na(dataset$Salary), +ave(dataset$Salary, FUN = function(x) mean(x, na.rm = TRUE)), +dataset$Salary) View(dataset) +str(dataset) +# Plantilla para el Pre Procesado de Datos - Datos Categóricos # Importar el dataset -dataset = read.csv('Social_Network_Ads.csv') -dataset = dataset[, 3:5] -# Dividir los datos en conjunto de entrenamiento y conjunto de test -# install.packages("caTools") +dataset = read.csv('Data.csv', stringsAsFactors = F) +str(dataset) +# Codificar las variables categóricas +dataset$Country = factor(dataset$Country, +levels = c("France", "Spain", "Germany"), +labels = c(1, 2, 3)) +str(dataset) +dataset$Purchased = factor(dataset$Purchased, +levels = c("No", "Yes"), +labels = c(0,1)) +str(dataset) library(caTools) set.seed(123) -split = sample.split(dataset$Purchased, SplitRatio = 0.75) +split = sample.split(dataset$Purchased, SplitRatio = 0.8) training_set = subset(dataset, split == TRUE) testing_set = subset(dataset, split == FALSE) +View(testing_set) +View(training_set) # Escalado de valores -training_set[,1:2] = scale(training_set[,1:2]) -testing_set[,1:2] = scale(testing_set[,1:2]) -# Ajustar el clasificador con el conjunto de entrenamiento. -#install.packages("e1071") -library(e1071) -classifier = svm(formula = Purchased ~ ., -data = training_set, -type = "C-classification", -kernel = "radial") -# Predicción de los resultados con el conjunto de testing -y_pred = predict(classifier, newdata = testing_set[,-3]) -# Crear la matriz de confusión -cm = table(testing_set[, 3], y_pred) -# Aplicar algoritmo de k-fold cross validation -# install.packages("caret") -library(caret) -folds = createFolds(training_set$Purchased, k = 10) -cv = lapply(folds, function(x) { -training_fold = training_set[-x, ] -test_fold = training_set[x, ] -classifier = svm(formula = Purchased ~ ., -data = training_fold, -type = "C-classification", -kernel = "radial") -y_pred = predict(classifier, newdata = test_fold[,-3]) -cm = table(test_fold[, 3], y_pred) -accuracy = (cm[1,1]+cm[2,2])/(cm[1,1]+cm[1,2]+cm[2,1]+cm[2,2]) -return(accuracy) -}) -cv -mean(as.numeric(cv)) -accuracy = mean(as.numeric(cv)) -accuracy_sd = sd(as.numeric(cv)) -knitr::opts_chunk$set(echo = TRUE) -anime = read.csv("anime.csv") +training_set[,2:3] = scale(training_set[,2:3]) +testing_set[,2:3] = scale(testing_set[,2:3]) +install.packages("arules") library(arules) -rating = read.transactions( -file = "watchlists.csv", -format = "single", -sep = ",", -cols=c("user_id","anime_id"), -rm.duplicates = T -) -rating = read.transactions( -file = "watchlists.csv", -format = "single", -sep = ",", -cols=c("user_id","anime_id"), -rm.duplicates = T -) -summary(rating) -itemFrequencyPlot(rating, topN = 10) -filter(anime, anime_id %in%c(1535,10757)) -View(anime) -library(tidyverse) -filter(anime, anime_id %in%c(1535,10757)) -itemFrequencyPlot(rating, topN = 10) -filter(anime, anime_id %in%c(1535,1575)) -itemFrequencyPlot(rating, topN = 10) -itemFrequencyPlot(rating, topN = 20) -filter(anime, anime_id ==1535) -also_see(rules, 1) -also_see <- function(rules, id_rule){ -lhs <- strsplit(gsub("\\{|\\}","", as.character(inspect(sort(rules, by = 'lift')[id_rule])$lhs)), ",") -rhs <- strsplit(gsub("\\{|\\}","", as.character(inspect(sort(rules, by = 'lift')[id_rule])$rhs)), ",") -print("Si has visto...") -anime[anime$anime_id %in% lhs[[1]],] -print("Te recomiendo..") -anime[anime$anime_id %in% lhs[[1]],] -} -also_see(rules, 1) -swords = read.csv("stormofswords.csv") -swords -library(igraph) -g <- graph.data.frame(swords, directed=FALSE) -print(g) -plot(g) -head(swords) -plot(g,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/2) -head(swords) -colnames(swords) = c("Source", "Target", "weight") -head(swords) -g <- graph.data.frame(swords, directed=FALSE) -plot(g,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/2) -adj=get.adjacency(g,attr='weight') -View(adj) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight, rescale = FALSE, ylim=c(1,4),xlim=c(-17,24), asp = 0) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight, rescale = FALSE, ylim=c(-20,20),xlim=c(-17,24), asp = 0) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight, vertex.size=E(g)$weight*0.25) -V(g) -V(g)[1] -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight, vertex.size=0.25) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight, vertex.size=0.01) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight, vertex.size=0.01, vertex.label=NA) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight/20, vertex.size=0.01, vertex.label=NA) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight/20, vertex.size=0.01) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight/20, vertex.size=0.01, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label=V(net)$media, vertex.label.color="black") -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight/20, vertex.size=0.01, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black") -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight/20, vertex.size=0.1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black") -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold, edge.color="black",edge.width=E(g)$weight/20, vertex.size=1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black") -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/20, vertex.size=1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black") -degree(g, mode="all") -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/20, vertex.size=degree(g, mode="all"), edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black") -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/20, vertex.size=degree(g, mode="all"), edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.7, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/10, vertex.size=degree(g, mode="all"), edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.7, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/10, vertex.size=degree(g, mode="all"), edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/10, vertex.size=degree(g, mode="all"), edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout.fruchterman.reingold,edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all")), edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout_with_fr(g.bg),edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all"))+1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout_with_fr(g),edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all"))+1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout_with_fr(g),edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all"))+1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout_with_lgl,edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all"))+1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout_with_lgl(g),edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all"))+1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout_with_lgl(g),edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all"))+1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.5, vertex.label.font=2) -plot.igraph(g,vertex.label=V(g)$name,layout=layout_with_lgl(g),edge.width=E(g)$weight/10, vertex.size=sqrt(degree(g, mode="all"))+1, edge.color="orange", -vertex.color="orange", vertex.frame.color="#ffffff", vertex.label.color="black", -vertex.label.cex=.7, vertex.label.font=2) -V(g) -V(net)[media=="Wall Street Journal"] -V(g)[media=="Wall Street Journal"] -V(g)[m"Wall Street Journal"] -V(g)["Wall Street Journal"] -V(g)[1] -View(g) -g[[1]] -g[[1]][1] -g[[1]][[1]] -g[[1]][[1]][1] -neighbors(g, #V(net)[media=="Wall Street Journal"], -mode="out") -neighbors(g, V(net)#[media=="Wall Street Journal"], -,mode="out") -neighbors(g, V(g)#[media=="Wall Street Journal"], -,mode="out") -neighbors(g, V(g)[3]#[media=="Wall Street Journal"], -,mode="out") -netm <- get.adjacency(g, attr="weight", sparse=F) -colnames(netm) <- V(g) -rownames(netm) <- V(g) -palf <- colorRampPalette(c("gold", "dark orange")) heatmap(netm[,17:1], Rowv = NA, Colv = NA, col = palf(100), -palf <- colorRampPalette(c("gold", "dark orange")) -heatmap(netm[,17:1], Rowv = NA, Colv = NA, col = palf(100), -scale="none", margins=c(10,10) ) -vertex_attr(g, name, index = V(g)) -network.vertex.names(g) -vertex_attr_names(g) -colnames(netm) <- V(g)$name -rownames(netm) <- V(g)$name -heatmap(netm[,17:1], Rowv = NA, Colv = NA, col = palf(100), -scale="none", margins=c(10,10) ) -vertex_attr_names(g) heatmap(netm[,17:1], Rowv = NA, Colv = NA, col = palf(100), -scale="none", margins=c(10,10) ) -heatmap(netm[,17:1], Rowv = NA, Colv = NA, col = palf(100), -scale="none", margins=c(10,10) ) -heatmap(netm[,], Rowv = NA, Colv = NA, col = palf(100), -scale="none", margins=c(10,10) ) -heatmap(netm[,], Rowv = NA, Colv = NA, col = palf(100), -scale="none", margins=c(10,10) ) -setwd("~/Developer/Projects/Matematicas/machinelearning-az/datasets/Part 10 - Model Selection & Boosting/Section 48 - Model Selection") -# Importar el dataset -dataset = read.csv('Social_Network_Ads.csv') -dataset = dataset[, 3:5] -# Dividir los datos en conjunto de entrenamiento y conjunto de test -# install.packages("caTools") -library(caTools) -set.seed(123) -split = sample.split(dataset$Purchased, SplitRatio = 0.75) -training_set = subset(dataset, split == TRUE) -testing_set = subset(dataset, split == FALSE) -# Escalado de valores -training_set[,1:2] = scale(training_set[,1:2]) -testing_set[,1:2] = scale(testing_set[,1:2]) -# Ajustar el clasificador con el conjunto de entrenamiento. -#install.packages("e1071") -library(e1071) -classifier = svm(formula = Purchased ~ ., -data = training_set, -type = "C-classification", -kernel = "radial") -# Predicción de los resultados con el conjunto de testing -y_pred = predict(classifier, newdata = testing_set[,-3]) -# Crear la matriz de confusión -cm = table(testing_set[, 3], y_pred) -# Aplicar algoritmo de k-fold cross validation -# install.packages("caret") -library(caret) -folds = createFolds(training_set$Purchased, k = 10) -cv = lapply(folds, function(x) { -training_fold = training_set[-x, ] -test_fold = training_set[x, ] -classifier = svm(formula = Purchased ~ ., -data = training_fold, -type = "C-classification", -kernel = "radial") -y_pred = predict(classifier, newdata = test_fold[,-3]) -cm = table(test_fold[, 3], y_pred) -accuracy = (cm[1,1]+cm[2,2])/(cm[1,1]+cm[1,2]+cm[2,1]+cm[2,2]) -return(accuracy) -}) -accuracy = mean(as.numeric(cv)) -accuracy_sd = sd(as.numeric(cv)) -classifier = train(form = Purchased ~ ., -data = training_set, method = 'svmRadial ') -classifier = train(form = Purchased ~ ., -data = training_set, method = 'svmRadial') -# Importar el dataset -dataset = read.csv('Social_Network_Ads.csv') -dataset = dataset[, 3:5] +setwd("~/Documents/Cursos Udemy/Matematicas/machinelearning-az/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori") +dataset = read.csv("Market_Basket_Optimisation.csv", header = FALSE) View(dataset) -dataset$Purchased = factor(dataset$Purchased) -library(caTools) -set.seed(123) -split = sample.split(dataset$Purchased, SplitRatio = 0.75) -training_set = subset(dataset, split == TRUE) -testing_set = subset(dataset, split == FALSE) -training_set[,1:2] = scale(training_set[,1:2]) -testing_set[,1:2] = scale(testing_set[,1:2]) -classifier = train(form = Purchased ~ ., -data = training_set, method = 'svmRadial') -classifier -classifier$bestTune -setwd("~/Developer/Projects/Matematicas/machinelearning-az/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost") -# Ajustar XGBoost al Conjunto de Entrenamiento -install.packages("xgboost") -# Ajustar XGBoost al Conjunto de Entrenamiento -#install.packages("xgboost") -library(xgboost) -dataset = read.csv('Churn_Modelling.csv') -dataset = dataset[, 4:14] +dataset = read.transactions("Market_Basket_Optimisation.csv", +sep = ",", rm.duplicates = TRUE) View(dataset) -dataset$Geography = as.numeric(factor(dataset$Geography, -levels = c("France", "Spain", "Germany"), -labels = c(1, 2, 3))) -dataset$Gender = as.numeric(factor(dataset$Gender, -levels = c("Female", "Male"), -labels = c(1,2))) -library(caTools) -set.seed(123) -split = sample.split(dataset$Exited, SplitRatio = 0.8) -training_set = subset(dataset, split == TRUE) -testing_set = subset(dataset, split == FALSE) -classifier = xgboost(data = as.matrix(training_set[, -11]), -label = training_set$Exited, -nrounds = 10) -# Aplicar algoritmo de k-fold cross validation -# install.packages("caret") -library(caret) -folds = createFolds(training_set$Exited, k = 10) -cv = lapply(folds, function(x) { -training_fold = training_set[-x, ] -test_fold = training_set[x, ] -classifier = xgboost(data = as.matrix(training_set[, -11]), -label = training_set$Exited, -nrounds = 10) -y_pred = predict(classifier, newdata = as.matrix(test_fold[,-11])) -y_pred = (y_pred >= 0.5) -cm = table(test_fold[, 11], y_pred) -accuracy = (cm[1,1]+cm[2,2])/(cm[1,1]+cm[1,2]+cm[2,1]+cm[2,2]) -return(accuracy) -}) -accuracy = mean(as.numeric(cv)) -accuracy_sd = sd(as.numeric(cv)) +summary(dataset) +itemFrequencyPlot(dataset, topN = 10) +# Entrenar algoritmo Apriori con el dataset +rules = apriori(data = dataset, +parameter = list(support = 0.004, confidence = 0.2)) +# Visualización de los resultados +inspect(sort(rules, by = 'lift')[1:10]) +# Visualización de los resultados +inspect(sort(rules, by = 'lift')[1:10]) +# Entrenar algoritmo Apriori con el dataset +rules = apriori(data = dataset, +parameter = list(support = 0.004, confidence = 0.1)) +# Visualización de los resultados +inspect(sort(rules, by = 'lift')[1:10]) +# Entrenar algoritmo Apriori con el dataset +rules = apriori(data = dataset, +parameter = list(support = 0.002, confidence = 0.1)) +# Visualización de los resultados +inspect(sort(rules, by = 'lift')[1:10]) diff --git a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.R b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.R index 587869b7..232cab99 100644 --- a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.R +++ b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.R @@ -1,12 +1,14 @@ # Plantilla para el Pre Procesado de Datos - Datos Categóricos # Importar el dataset -dataset = read.csv('Data.csv') - +dataset = read.csv('Data.csv', stringsAsFactors = F) +str(dataset) # Codificar las variables categóricas dataset$Country = factor(dataset$Country, levels = c("France", "Spain", "Germany"), labels = c(1, 2, 3)) + dataset$Purchased = factor(dataset$Purchased, levels = c("No", "Yes"), - labels = c(0,1)) \ No newline at end of file + labels = c(0,1)) +str(dataset) diff --git a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.py b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.py index 345babc8..401ed333 100644 --- a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.py +++ b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/categorical_data.py @@ -20,9 +20,18 @@ # Codificar datos categóricos from sklearn.preprocessing import LabelEncoder, OneHotEncoder +from sklearn.compose import ColumnTransformer + labelencoder_X = LabelEncoder() X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) -onehotencoder = OneHotEncoder(categorical_features=[0]) -X = onehotencoder.fit_transform(X).toarray() + +ct = ColumnTransformer( + [('one_hot_encoder', OneHotEncoder(categories='auto'), [0])], # The column numbers to be transformed (here is [0] but can be [0, 1, 3]) + remainder='passthrough' # Leave the rest of the columns untouched +) + +#onehotencoder = OneHotEncoder(categorical_features=[0]) +#X = onehotencoder.fit_transform(X).toarray() +X = np.array(ct.fit_transform(X), dtype=np.float) labelencoder_y = LabelEncoder() y = labelencoder_y.fit_transform(y) \ No newline at end of file diff --git a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/data_preprocessing_template.py b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/data_preprocessing_template.py index 44d781b0..c71e2aae 100644 --- a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/data_preprocessing_template.py +++ b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/data_preprocessing_template.py @@ -25,7 +25,7 @@ # Escalado de variables -"""from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) -X_test = sc_X.transform(X_test)""" \ No newline at end of file +X_test = sc_X.transform(X_test) \ No newline at end of file diff --git a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py index 56344361..dafb5a79 100644 --- a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py +++ b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py @@ -19,7 +19,7 @@ y = dataset.iloc[:, 3].values # Tratamiento de los NAs -from sklearn.preprocessing import Imputer -imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) +from sklearn.impute import SimpleImputer +imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean', verbose=0) imputer = imputer.fit(X[:, 1:3]) X[:, 1:3] = imputer.transform(X[:,1:3]) \ No newline at end of file