Skip to content

Commit

Permalink
I updated the code to avoid warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
monopteryx committed Aug 9, 2024
1 parent 93487cb commit 607b898
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 30 deletions.
7 changes: 3 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: FeatureSelection
Type: Package
Title: Feature extraction and selection based on 'glmnet', 'xgboost' and 'ranger'
Version: 1.0.0
Date: 2021-05-19
Date: 2024-08-09
Authors@R: c( person(given = "Lampros", family = "Mouselimis", email = "mouselimislampros@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "https://orcid.org/0000-0002-8024-1546")))
Maintainer: Lampros Mouselimis <mouselimislampros@gmail.com>
BugReports: https://github.com/mlampros/FeatureSelection/issues
Expand All @@ -12,7 +12,7 @@ Depends:
R(>= 3.3.0)
Imports:
doParallel,
dplyr,
data.table,
glmnet,
ranger,
xgboost,
Expand All @@ -28,6 +28,5 @@ Suggests:
covr
SystemRequirements: update: apt-get -y update (deb)
License: GPL-3
LazyData: TRUE
Encoding: UTF-8
RoxygenNote: 7.1.1
RoxygenNote: 7.3.0
5 changes: 1 addition & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
FROM rocker/rstudio:devel


LABEL maintainer='Lampros Mouselimis'


RUN export DEBIAN_FRONTEND=noninteractive; apt-get -y update && \
apt-get install -y make zlib1g-dev libssl-dev libcurl4-openssl-dev && \
apt-get install -y sudo && \
apt-get -y update && \
R -e "install.packages(c( 'doParallel', 'dplyr', 'glmnet', 'ranger', 'xgboost', 'Matrix', 'magrittr', 'utils', 'stats', 'graphics', 'grDevices', 'rlang', 'testthat', 'covr', 'remotes' ), repos = 'https://cloud.r-project.org/' )" && \
R -e "install.packages(c( 'doParallel', 'data.table', 'glmnet', 'ranger', 'xgboost', 'Matrix', 'magrittr', 'utils', 'stats', 'graphics', 'grDevices', 'rlang', 'testthat', 'covr', 'remotes' ), repos = 'https://cloud.r-project.org/' )" && \
R -e "remotes::install_github('mlampros/FeatureSelection', upgrade = 'always', dependencies = TRUE, repos = 'https://cloud.r-project.org/')" && \
apt-get autoremove -y && \
apt-get clean


ENV USER rstudio


6 changes: 1 addition & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,8 @@ export(regr_folds)
export(wrapper_feat_select)
importFrom(Matrix,Matrix)
importFrom(Matrix,colSums)
importFrom(data.table,as.data.table)
importFrom(doParallel,registerDoParallel)
importFrom(dplyr,funs)
importFrom(dplyr,group_by)
importFrom(dplyr,n)
importFrom(dplyr,summarize)
importFrom(dplyr,summarize_each)
importFrom(glmnet,cv.glmnet)
importFrom(grDevices,dev.cur)
importFrom(grDevices,dev.off)
Expand Down
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@

## FeatureSelection

* **18-05-2016**: I added tests and code-coverage
* **09-08-2024**: I replaced **dplyr** with **data.table** because the **dplyr** functions 'summarise_each_' and 'funs' gave deprecation warnings
* **19-05-2021**: I replaced **doMC** with **doParallel** because **doMC** does not work on both **Unix** and **Windows** OS (applies only to **'glmnet-lasso'** method if number of threads > 1)
* **03-02-2020**:
+ Updated the R files so that *Feature Selection* works with the newest versions of the imported R packages
+ Adjusted the tests
+ Added Dockerfile and docker image
+ Updated the README.md and .travis.yml files
* **19-05-2021**: I replaced **doMC** with **doParallel** because **doMC** does not work on both **Unix** and **Windows** OS (applies only to **'glmnet-lasso'** method if number of threads > 1)
* **18-05-2016**: I added tests and code-coverage
45 changes: 33 additions & 12 deletions R/feature_selection.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

utils::globalVariables(c("%>%",
".",
".N",
".SD",
"coefficients",
"predict")) # Keep 'predict' as a global variable. It appears both in 'stats' and 'glmnet' however I can not specify 'predict.cv.glmnet' because the function does not appear in the >= 3.0.0 version of the package (I receive an error otherwise)


Expand Down Expand Up @@ -32,7 +35,7 @@ utils::globalVariables(c("%>%",
#'
#' @export
#' @importFrom glmnet cv.glmnet
#' @importFrom dplyr group_by summarize summarize_each funs n
#' @importFrom data.table as.data.table
#' @importFrom doParallel registerDoParallel
#' @importFrom xgboost xgb.DMatrix xgb.train xgb.importance
#' @importFrom ranger ranger
Expand Down Expand Up @@ -120,9 +123,18 @@ utils::globalVariables(c("%>%",
#' }


feature_selection = function(X, y, method = NULL, params_glmnet = NULL, params_xgboost = NULL, params_ranger = NULL, xgb_sort = NULL, CV_folds = 5, stratified_regr = FALSE,

scale_coefs_glmnet = FALSE, cores_glmnet = NULL, verbose = FALSE) {
feature_selection = function(X,
y,
method = NULL,
params_glmnet = NULL,
params_xgboost = NULL,
params_ranger = NULL,
xgb_sort = NULL,
CV_folds = 5,
stratified_regr = FALSE,
scale_coefs_glmnet = FALSE,
cores_glmnet = NULL,
verbose = FALSE) {

if (is.null(method)) stop("use method = .. to select one of the available methods : xgboost, glmnet-lasso, ranger")
if (CV_folds < 1) stop("CV_folds should be >= 1")
Expand Down Expand Up @@ -376,9 +388,14 @@ feature_selection = function(X, y, method = NULL, params_glmnet = NULL, params_x

else {

all_feat = data.frame(do.call('rbind', get_all_feat))

tbl_x = data.frame(all_feat %>% dplyr::group_by(.data$Feature) %>% dplyr::summarize(coefficients = mean(.data$coefficients, na.rm = TRUE), Frequency = dplyr::n())) # for ".data" see: https://community.rstudio.com/t/how-to-solve-no-visible-binding-for-global-variable-note/28887/3
all_feat = data.frame(do.call('rbind', get_all_feat)) |>
data.table::as.data.table()

tbl_x = all_feat[, .(coefficients = mean(coefficients, na.rm = TRUE),
Frequency = .N),
by = 'Feature'] |>
as.data.frame()

if (scale_coefs_glmnet) tbl_x[, 2] = abs(tbl_x[, 2])
tbl_x = tbl_x[order(tbl_x$Frequency, tbl_x$coefficients, decreasing = TRUE),] # the data.frame in 'glmnet-lasso' is sorted by Frequency (default)
}
Expand Down Expand Up @@ -498,9 +515,11 @@ feature_selection = function(X, y, method = NULL, params_glmnet = NULL, params_x
gc()
}

tbl_x = data.frame(do.call('rbind', get_all_feat))
tbl_x = data.frame(do.call('rbind', get_all_feat)) |>
data.table::as.data.table()

tbl1 = data.frame(tbl_x %>% dplyr::group_by(.data$Feature) %>% dplyr::summarize_each(dplyr::funs(mean(., na.rm = TRUE)))) # for ".data" see: https://community.rstudio.com/t/how-to-solve-no-visible-binding-for-global-variable-note/28887/3
tbl1 = tbl_x[, lapply(.SD, mean, na.rm = TRUE), by = 'Feature'] |>
as.data.frame()

if (is.null(xgb_sort) || (xgb_sort == 'Frequency')) {

Expand Down Expand Up @@ -636,9 +655,11 @@ feature_selection = function(X, y, method = NULL, params_glmnet = NULL, params_x
gc()
}

tbl_x = data.frame(do.call('rbind', get_all_feat))

tbl1 = data.frame(tbl_x %>% dplyr::group_by(.data$Feature) %>% dplyr::summarize_each(dplyr::funs(mean(., na.rm = TRUE)))) # for ".data" see: https://community.rstudio.com/t/how-to-solve-no-visible-binding-for-global-variable-note/28887/3
tbl_x = data.frame(do.call('rbind', get_all_feat)) |>
data.table::as.data.table()

tbl1 = tbl_x[, lapply(.SD, mean, na.rm = TRUE), by = 'Feature'] |>
as.data.frame()

tbl1 = tbl1[order(tbl1[, 2], decreasing = TRUE), ]

Expand Down
10 changes: 7 additions & 3 deletions R/wrapper_feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' Furthermore the user can limit the number of features using the keep_number_feat parameter of the params_feature list.
#'
#' @export
#' @importFrom dplyr group_by summarize n
#' @importFrom data.table as.data.table
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#'
Expand Down Expand Up @@ -183,9 +183,13 @@ wrapper_feat_select = function(X, y, params_glmnet = NULL, params_xgboost = NULL

modify_lst = lapply(out_union, function(x) data.frame(feature = x$features, rank = normalized(length(x$features):1)))

modify_lst1 = data.frame(do.call(rbind, modify_lst))
modify_lst1 = data.frame(do.call(rbind, modify_lst)) |>
data.table::as.data.table()

tbl_x = data.frame(modify_lst1 %>% dplyr::group_by(.data$feature) %>% dplyr::summarize(importance = sum(rank, na.rm = TRUE), Frequency = dplyr::n()))
tbl_x = modify_lst1[, .(importance = sum(rank, na.rm = TRUE),
Frequency = .N),
by = 'feature'] |>
as.data.frame()

tbl1 = tbl_x[order(tbl_x$importance, decreasing = TRUE), ]

Expand Down

0 comments on commit 607b898

Please sign in to comment.