Skip to content

Commit e0de1fc

Browse files
committed
Update B05Sa_cluster
1 parent 01dc296 commit e0de1fc

File tree

1 file changed

+65
-45
lines changed
  • devel/shiny/B05Sa_cluster

1 file changed

+65
-45
lines changed

devel/shiny/B05Sa_cluster/app.R

Lines changed: 65 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,26 @@ conf <- BioDataScience::config()
44
library(shiny)
55
library(learndown)
66
library(BioDataScience2)
7+
library(dplyr)
8+
library(tidyr)
9+
library(flow)
10+
library(chart)
711

812
# add news functions ----
913
## This function move to a package
1014

15+
# CAH for SciViews, version 1.1.1
16+
# Copyright (c) 2021, Philippe Grosjean (phgrsojean@sciviews.org)
17+
18+
# dist is really a dissimilarity matrix => we use dissimilarity() as in the
19+
# {cluster} package, i.e., class is c("dissimilarity", "dist")
20+
# TODO: also make a similarity object and convert between the two
21+
# fun can be stats::dist, vegan::vegdist, vegan::designdist, cluster::daisy
22+
# factoextra::get_dist and probably other dist-compatible functions
23+
# Depending on method =, use either vegan::vegdist or stats::dist as default fun
1124
dissimilarity <- function(data, formula = ~ ., subset = NULL,
12-
method = "euclidean", scale = FALSE, rownames.col = "rowname",
13-
transpose = FALSE, fun = NULL, ...) {
25+
method = "euclidean", scale = FALSE, rownames.col = "rowname",
26+
transpose = FALSE, fun = NULL, ...) {
1427
# TODO: get more meaningful warnings and errors by replacing fun by actual
1528
# name of the function
1629
if (is.null(fun)) {# Default function depends on the chosen method
@@ -104,7 +117,7 @@ as.dissimilarity.matrix <- function(x, ...) {
104117

105118
# We want to print only the first few rows and columns
106119
print.dissimilarity <- function(x, digits.d = 3L, rownames.lab = "labels",
107-
...) {
120+
...) {
108121
mat <- as.matrix(x)
109122
mat <- format(round(mat, digits.d))
110123
diag(mat) <- ""
@@ -128,7 +141,7 @@ print.dissimilarity <- function(x, digits.d = 3L, rownames.lab = "labels",
128141
more_info <- " (transposed data)"
129142
}
130143
cat("Dissimilarity matrix with metric: ", attr(x, "metric"),
131-
more_info, "\n", sep = "")
144+
more_info, "\n", sep = "")
132145
print(tbl)
133146
invisible(x)
134147
}
@@ -144,14 +157,14 @@ nobs.dissimilarity <- function(object, ...)
144157
# TODO: `[` by first transforming into a matrix with as.matrix()
145158

146159
autoplot.dissimilarity <- function(object, order = TRUE, show.labels = TRUE,
147-
lab.size = NULL, gradient = list(low = "red", mid = "white", high = "blue"),
148-
...) {
160+
lab.size = NULL, gradient = list(low = "red", mid = "white", high = "blue"),
161+
...) {
149162
factoextra::fviz_dist(object, order = order, show_labels = show.labels,
150-
lab_size = lab.size, gradient = gradient)
163+
lab_size = lab.size, gradient = gradient)
151164
}
152165

153166
chart.dissimilarity <- function(data, ...,
154-
type = NULL, env = parent.frame())
167+
type = NULL, env = parent.frame())
155168
autoplot(data, type = type, ...)
156169

157170
# cluster object (inheriting from hclust)
@@ -234,7 +247,7 @@ augment.cluster <- function(x, data, k = NULL, h = NULL, ...) {
234247
clst <- predict(x, k = k, h = h, ...)
235248
if (nrow(data) != length(clst)) {
236249
stop("Different number of items in ", msg, " (",nrow(data) ,
237-
") and in the clusters (", length(clst), ")")
250+
") and in the clusters (", length(clst), ")")
238251
}
239252
tibble::add_column(data, .fitted = clst)
240253
}
@@ -245,7 +258,7 @@ augment.cluster <- function(x, data, k = NULL, h = NULL, ...) {
245258
# /beautiful-dendrogram-visualizations-in-r-5-must-known-methods
246259
# -unsupervised-machine-learning
247260
plot.cluster <- function(x, y, hang = -1, check = TRUE, type = "vertical",
248-
lab = "Height", ...) {
261+
lab = "Height", ...) {
249262
type <- match.arg(type[1], c("vertical", "horizontal", "circular"))
250263
# type == "circular" is special because we need to transform as ape::phylo
251264
if (type == "circular") {
@@ -269,11 +282,11 @@ plot.cluster <- function(x, y, hang = -1, check = TRUE, type = "vertical",
269282
# TODO: should be nice to do similar function for other symbols too in SciViews
270283
circle <- function(x = 0, y = 0, d = 1, col = 0, lwd = 1, lty = 1, ...)
271284
symbols(x = x, y = y, circles = d / 2, fg = col, lwd = lwd, lty = lty,
272-
inches = FALSE, add = TRUE, ...)
285+
inches = FALSE, add = TRUE, ...)
273286

274287
# TODO: make sure the dendrogram is correct with different ggplot themes
275288
autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
276-
theme = theme_sciviews(), xlab = "", ylab = "Height", ...) {
289+
theme = theme_sciviews(), xlab = "", ylab = "Height", ...) {
277290
if (is.null(type))
278291
type <- "vertical"
279292
type <- match.arg(type[1], c("vertical", "horizontal", "circular"))
@@ -301,49 +314,55 @@ autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
301314
scale_y_reverse() +
302315
coord_polar(start = pi/2) +
303316
geom_text(data = label_df,
304-
aes(x = id, y = -0.02, label = labels, hjust = hjust),
305-
size = circ.text.size, angle = label_df$angle, inherit.aes = FALSE) +
317+
aes(x = id, y = -0.02, label = labels, hjust = hjust),
318+
size = circ.text.size, angle = label_df$angle, inherit.aes = FALSE) +
306319
theme(panel.border = element_blank(),
307-
axis.text = element_blank(),
308-
axis.line = element_blank(),
309-
axis.ticks.y = element_blank()) +
320+
axis.text = element_blank(),
321+
axis.line = element_blank(),
322+
axis.ticks.y = element_blank()) +
310323
ylab("")
311324

312325
} else if (type == "vertical") {# Vertical dendrogram
313326
dendro <- dendro +
314327
scale_x_continuous(breaks = seq_along(ddata$labels$label),
315-
labels = ddata$labels$label) +
328+
labels = ddata$labels$label) +
316329
scale_y_continuous(expand = expansion(mult = c(0, 0.02))) +
317330
theme(panel.border = element_blank(),
318-
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
319-
axis.line.x = element_blank(),
320-
axis.ticks.x = element_blank(),
321-
axis.text.y = element_text(angle = 90, hjust = 0.5))
331+
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
332+
axis.line.x = element_blank(),
333+
axis.ticks.x = element_blank(),
334+
axis.text.y = element_text(angle = 90, hjust = 0.5))
322335

323336
} else {# Horizontal dendrogram
324337
dendro <- dendro +
325338
scale_x_continuous(breaks = seq_along(ddata$labels$label),
326-
labels = ddata$labels$label, position = "top") +
339+
labels = ddata$labels$label, position = "top") +
327340
scale_y_reverse(expand = expansion(mult = c(0.05, 0))) +
328341
coord_flip() +
329342
theme(panel.border = element_blank(),
330-
axis.line.y = element_blank(),
331-
axis.ticks.y = element_blank())
343+
axis.line.y = element_blank(),
344+
axis.ticks.y = element_blank())
332345
}
333346
dendro
334347
}
335348

336349
chart.cluster <- function(data, ...,
337-
type = NULL, env = parent.frame())
350+
type = NULL, env = parent.frame())
338351
autoplot(data, type = type, ...)
339352

353+
# To indicate where to cut in the dendrogram, one could use `geom_hline()`,
354+
# but when the dendrogram is horizontal or circular, this is suprizing. So,
355+
# I define geom_dendroline(h = ....)
356+
geom_dendroline <- function(h, ...)
357+
geom_hline(yintercept = h, ...)
340358

341359
# data ----
342-
penguins <- read("penguins", package = "palmerpenguins")
360+
penguins <- data.io::read("penguins", package = "palmerpenguins")
343361

344362
penguins %>.%
345363
# filter(., sex == "male") %>.%
346-
select(., species, bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g) %>.%
364+
select(., species, bill_length_mm, bill_depth_mm, flipper_length_mm,
365+
body_mass_g) %>.%
347366
drop_na(.) -> peng
348367

349368
peng %>.%
@@ -356,10 +375,10 @@ score_cah <- function(x, reference = peng$species, digits = 5) {
356375
max_gr <- apply(tab, 1, which.max)
357376
tab[ , ]
358377

359-
if(length(unique(max_gr)) < 3)
378+
if (length(unique(max_gr)) < 3)
360379
res <- "Votre CAH ne permet pas de retrouver les 3 groupes. Un ou plusieurs groupes sont confondus."
361380

362-
if(length(unique(max_gr)) == 3) {
381+
if (length(unique(max_gr)) == 3) {
363382
tot <- apply(tab, 1, max) / rowSums(tab)
364383
res <- paste0("Votre CAH permet de discerner 3 groupes avec une précision de ", round((100*sum(tot)/nlevels(reference)),digits = digits ), "%.")
365384
}
@@ -369,15 +388,16 @@ score_cah <- function(x, reference = peng$species, digits = 5) {
369388
# UI -----
370389

371390
ui <- fluidPage(
372-
learndownShiny("Regroupement d'espèces de manchôts avec la classification hiérarchique ascendante."),
391+
learndownShiny("Classification hiérarchique ascendante sur des mesures de manchots d'antarctique."),
373392

374393
sidebarLayout(
375394
sidebarPanel(
376-
p("Vous avez à disposition 342 manchôts de 3 espèces différentes. Trouvez les meilleurs paramètres afin d'obtenir la plus haute similitude entre votre CAH et les observations de terrain."),
377-
p("Les variables monitorées sont les suivante : la longueur du bec (mm), la profondeur du bec (mm), la longueur de la nageoire (mm), la masse (g)."),
378-
selectInput("method_dist", "Indice de distance", choices = c("euclidian", "bray", "canberra", "manhattan")),
395+
p("Vous avez à disposition des mesures sur 342 manchots de 3 espèces différentes. Trouvez les meilleurs paramètres pour votre CAH afin d'optimiser votre regroupement."),
396+
p("Les variables mesurées sont les suivantes : la longueur du bec (mm), la largeur du bec (mm), la longueur de la nageoire (mm) et la masse (g)."),
397+
selectInput("method_dist", "Métrique de distance", choices = c("euclidian", "bray", "canberra", "manhattan")),
379398
selectInput("scale", "Standardisation", choices = c(FALSE, TRUE)),
380-
selectInput("method_clust", "Méthode de CAH", choices = c("complete", "single","average", "ward.D2")),
399+
selectInput("method_clust", "Méthode de CAH",
400+
choices = c("complete", "single", "average", "ward.D2")),
381401
hr(),
382402
submitQuitButtons()
383403
),
@@ -399,11 +419,11 @@ ui <- fluidPage(
399419
)
400420
)
401421

402-
403422
server <- function(input, output, session) {
404423

405424
cah <- reactive({
406-
peng_dist <- dissimilarity(data = peng_red, scale = as.logical(input$scale), method = input$method_dist)
425+
peng_dist <- dissimilarity(data = peng_red, scale = as.logical(input$scale),
426+
method = input$method_dist)
407427
peng_clust <- cluster(peng_dist, method = input$method_clust)
408428
peng_clust
409429
})
@@ -429,14 +449,14 @@ server <- function(input, output, session) {
429449
})
430450

431451

432-
trackEvents(session, input, output,
433-
sign_in.fun = BioDataScience::sign_in, config = conf)
434-
trackSubmit(session, input, output, max_score = 3, solution =
435-
list(method_dist = "euclidian", scale = "TRUE", method_clust = "ward.D2"),
436-
comment = "",
437-
message.success = "Correct, c'est la meilleur solution. La CAH obtient un score très bon de plus de 94 % de correspondace",
438-
message.error = "Incorrect, un meilleur choix des paramètres est possible.")
439-
trackQuit(session, input, output, delay = 20)
452+
trackEvents(session, input, output,
453+
sign_in.fun = BioDataScience::sign_in, config = conf)
454+
trackSubmit(session, input, output, max_score = 3, solution =
455+
list(method_dist = "euclidian", scale = "TRUE", method_clust = "ward.D2"),
456+
comment = "",
457+
message.success = "Correct, c'est la meilleur solution. La CAH obtient un score très bon de plus de 94 % de correspondance",
458+
message.error = "Incorrect, un meilleur choix des paramètres est possible.")
459+
trackQuit(session, input, output, delay = 20)
440460
}
441461

442462
shinyApp(ui, server)

0 commit comments

Comments
 (0)