Skip to content

Commit

Permalink
[R-package] Add support for R 4.0 (fixes #3064, fixes #3024) (#3065)
Browse files Browse the repository at this point in the history
* [R-package] replaced gendef.exe with R code to create R.def (fixes #3064)

* fix broken things

* trying to add R4.0 docs

* changed make

* fixing make paths

* update notes on environment variables

* fix linting

* fixes to CI

* fixing build_cmd and other stuff

* fix bad substitution

* fix Azure Linux builds

* I am bad at bash

* simplifying

* only testing R

* getting better logs

* mingw32

* docs

* toolchain

* using msys

* fix visual studio condition

* toolchain test

* full CI

* fix if-elses

* bump allowed NOTEs

* search for Rscript

* updates to docs

* use processx

* fix mismatched arguments

* move CI to GitHub Actions

* minor changes

* fix workflow file

* fix templating

* fix Azure DevOps

* debugging windows builds

* dont shQuote file name

* all GitHub Actions jobs

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* minor cleanup

* remove objdump printing

* make file.remove() invisible

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* reduce duplicated paths in docs

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
  • Loading branch information
jameslamb and StrikerRUS authored Jun 14, 2020
1 parent e706bec commit e83042f
Show file tree
Hide file tree
Showing 12 changed files with 324 additions and 33 deletions.
6 changes: 5 additions & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ environment:
matrix:
- COMPILER: MINGW
TASK: r-package
- COMPILER: MSVC
R_VERSION: 3.6
TOOLCHAIN: MINGW
- COMPILER: MSVC
TASK: r-package
R_VERSION: 4.0
TOOLCHAIN: MSVC
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
- COMPILER: MSVC
TASK: python
Expand Down
22 changes: 21 additions & 1 deletion .ci/test_r_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,26 @@ mkdir -p $R_LIB_PATH
echo "R_LIBS=$R_LIB_PATH" > ${HOME}/.Renviron
export PATH="$R_LIB_PATH/R/bin:$PATH"

# Get details needed for installing R components
#
# NOTES:
# * Linux builds on Azure use a container and don't need these details
if ! { [[ $AZURE == "true" ]] && [[ $OS_NAME == "linux" ]]; }; then
R_MAJOR_VERSION=( ${R_VERSION//./ } )
if [[ "${R_MAJOR_VERSION}" == "3" ]]; then
export R_MAC_VERSION=3.6.3
export R_LINUX_VERSION="3.6.3-1bionic"
export R_APT_REPO="bionic-cran35/"
elif [[ "${R_MAJOR_VERSION}" == "4" ]]; then
export R_MAC_VERSION=4.0.0
export R_LINUX_VERSION="4.0.0-1.1804.0"
export R_APT_REPO="bionic-cran40/"
else
echo "Unrecognized R version: ${R_VERSION}"
exit -1
fi
fi

# installing precompiled R for Ubuntu
# https://cran.r-project.org/bin/linux/ubuntu/#installation
# adding steps from https://stackoverflow.com/a/56378217/3986677 to get latest version
Expand All @@ -18,7 +38,7 @@ if [[ $AZURE != "true" ]] && [[ $OS_NAME == "linux" ]]; then
--keyserver keyserver.ubuntu.com \
--recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
sudo add-apt-repository \
"deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/"
"deb https://cloud.r-project.org/bin/linux/ubuntu ${R_APT_REPO}"
sudo apt-get update
sudo apt-get install \
--no-install-recommends \
Expand Down
56 changes: 46 additions & 10 deletions .ci/test_r_package_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,60 @@ function Download-Miktex-Setup {
Download-File-With-Retries $FileToDownload $destfile
}

$env:R_WINDOWS_VERSION = "3.6.3"
$env:R_LIB_PATH = "$env:BUILD_SOURCESDIRECTORY/RLibrary" -replace '[\\]', '/'
$env:R_LIBS = "$env:R_LIB_PATH"
$env:PATH = "$env:R_LIB_PATH/Rtools/bin;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:R_LIB_PATH/miktex/texmfs/install/miktex/bin/x64;" + $env:PATH
$env:PATH = "$env:R_LIB_PATH/Rtools/bin;" + "$env:R_LIB_PATH/Rtools/usr/bin;" + "$env:R_LIB_PATH/R/bin/x64;" + "$env:R_LIB_PATH/miktex/texmfs/install/miktex/bin/x64;" + $env:PATH
$env:CRAN_MIRROR = "https://cloud.r-project.org/"
$env:CTAN_MIRROR = "https://ctan.math.illinois.edu/systems/win32/miktex"
$env:CTAN_MIKTEX_ARCHIVE = "$env:CTAN_MIRROR/setup/windows-x64/"
$env:CTAN_PACKAGE_ARCHIVE = "$env:CTAN_MIRROR/tm/packages/"

# Get details needed for installing R components
#
# NOTES:
# * some paths and file names are different on R4.0
$env:R_MAJOR_VERSION = $env:R_VERSION.split('.')[0]
if ($env:R_MAJOR_VERSION -eq "3") {
$env:RTOOLS_MINGW_BIN = "$env:R_LIB_PATH/Rtools/mingw_64/bin"
$env:RTOOLS_EXE_FILE = "Rtools35.exe"
$env:R_WINDOWS_VERSION = "3.6.3"
} elseif ($env:R_MAJOR_VERSION -eq "4") {
$env:RTOOLS_MINGW_BIN = "$env:R_LIB_PATH/Rtools/mingw64/bin"
$env:RTOOLS_EXE_FILE = "rtools40-x86_64.exe"
$env:R_WINDOWS_VERSION = "4.0.0"
} else {
Write-Output "[ERROR] Unrecognized R version: $env:R_VERSION"
Check-Output $false
}

if ($env:COMPILER -eq "MINGW") {
$env:CXX = "$env:R_LIB_PATH/Rtools/mingw_64/bin/g++.exe"
$env:CC = "$env:R_LIB_PATH/Rtools/mingw_64/bin/gcc.exe"
$env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe"
$env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe"
}

cd $env:BUILD_SOURCESDIRECTORY
tzutil /s "GMT Standard Time"
[Void][System.IO.Directory]::CreateDirectory($env:R_LIB_PATH)

if ($env:COMPILER -eq "MINGW") {
if ($env:TOOLCHAIN -eq "MINGW") {
Write-Output "Telling R to use MinGW"
$install_libs = "$env:BUILD_SOURCESDIRECTORY/R-package/src/install.libs.R"
((Get-Content -path $install_libs -Raw) -replace 'use_mingw <- FALSE','use_mingw <- TRUE') | Set-Content -Path $install_libs
((Get-Content -Path $install_libs -Raw) -Replace 'use_mingw <- FALSE','use_mingw <- TRUE') | Set-Content -Path $install_libs
} elseif ($env:TOOLCHAIN -eq "MSYS") {
Write-Output "Telling R to use MSYS"
$install_libs = "$env:BUILD_SOURCESDIRECTORY/R-package/src/install.libs.R"
((Get-Content -Path $install_libs -Raw) -Replace 'use_msys2 <- FALSE','use_msys2 <- TRUE') | Set-Content -Path $install_libs
} elseif ($env:TOOLCHAIN -eq "MSVC") {
# no customization for MSVC
} else {
Write-Output "[ERROR] Unrecognized compiler: $env:TOOLCHAIN"
Check-Output $false
}

# download R and RTools
Write-Output "Downloading R and Rtools"
Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/base/old/$env:R_WINDOWS_VERSION/R-$env:R_WINDOWS_VERSION-win.exe" -destfile "R-win.exe"
Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/Rtools/Rtools35.exe" -destfile "Rtools.exe"
Download-File-With-Retries -url "https://cloud.r-project.org/bin/windows/Rtools/$env:RTOOLS_EXE_FILE" -destfile "Rtools.exe"

# Install R
Write-Output "Installing R"
Expand All @@ -67,9 +93,9 @@ Write-Output "Installing Rtools"
Start-Process -FilePath Rtools.exe -NoNewWindow -Wait -ArgumentList "/VERYSILENT /DIR=$env:R_LIB_PATH/Rtools" ; Check-Output $?
Write-Output "Done installing Rtools"

# MiKTeX and pandoc can be skipped on non-MINGW builds, since we don't
# MiKTeX and pandoc can be skipped on MSVC builds, since we don't
# build the package documentation for those
if ($env:COMPILER -eq "MINGW") {
if ($env:COMPILER -ne "MSVC") {
Download-Miktex-Setup "$env:CTAN_MIKTEX_ARCHIVE" "miktexsetup-x64.zip"
Add-Type -AssemblyName System.IO.Compression.FileSystem
[System.IO.Compression.ZipFile]::ExtractToDirectory("miktexsetup-x64.zip", "miktex")
Expand Down Expand Up @@ -115,7 +141,7 @@ if ($env:COMPILER -ne "MSVC") {
$note_str = Get-Content "${LOG_FILE_NAME}" | Select-String -Pattern ' NOTE' | Out-String ; Check-Output $?
$relevant_line = $note_str -match '.*Status: (\d+) NOTE.*'
$NUM_CHECK_NOTES = $matches[1]
$ALLOWED_CHECK_NOTES = 3
$ALLOWED_CHECK_NOTES = 4
if ([int]$NUM_CHECK_NOTES -gt $ALLOWED_CHECK_NOTES) {
Write-Output "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed"
Check-Output $False
Expand All @@ -139,6 +165,16 @@ if ($checks.Matches.length -eq 0) {
Check-Output $False
}

# Checking that we got the right toolchain for MinGW. If using MinGW, both
# MinGW and MSYS toolchains are supported
if ($env:COMPILER -eq "MINGW") {
$checks = Select-String -Path "${INSTALL_LOG_FILE_NAME}" -Pattern "Trying to build with.*$env:TOOLCHAIN"
if ($checks.Matches.length -eq 0) {
Write-Output "The wrong toolchain was used. Check the build logs."
Check-Output $False
}
}

if ($env:COMPILER -eq "MSVC") {
Write-Output "Running tests with testthat.R"
cd R-package/tests
Expand Down
23 changes: 22 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:

jobs:
test:
name: ${{ matrix.task }} (${{ matrix.os }}, ${{ matrix.compiler }})
name: ${{ matrix.task }} (${{ matrix.os }}, ${{ matrix.compiler }}, R ${{ matrix.r_version }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
Expand All @@ -19,15 +19,35 @@ jobs:
- os: ubuntu-latest
task: r-package
compiler: gcc
r_version: 3.6
- os: ubuntu-latest
task: r-package
compiler: gcc
r_version: 4.0
- os: ubuntu-latest
task: r-package
compiler: clang
r_version: 3.6
- os: ubuntu-latest
task: r-package
compiler: clang
r_version: 4.0
- os: macOS-latest
task: r-package
compiler: gcc
r_version: 3.6
- os: macOS-latest
task: r-package
compiler: gcc
r_version: 4.0
- os: macOS-latest
task: r-package
compiler: clang
r_version: 3.6
- os: macOS-latest
task: r-package
compiler: clang
r_version: 4.0
steps:
- name: Checkout repository
uses: actions/checkout@v1
Expand All @@ -53,5 +73,6 @@ jobs:
export CONDA="$HOME/miniconda"
export PATH="$CONDA/bin:${HOME}/.local/bin:$PATH"
export LGB_VER=$(head -n 1 VERSION.txt)
export R_VERSION="${{ matrix.r_version }}"
$GITHUB_WORKSPACE/.ci/setup.sh
$GITHUB_WORKSPACE/.ci/test.sh
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ R-package/src-i386
lightgbm_r/*
lightgbm*.tar.gz
lightgbm.Rcheck/
*.def

# Files created by R examples and tests
**/lgb-Dataset.data
Expand Down
11 changes: 9 additions & 2 deletions .vsts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,18 @@ jobs:
pool:
vmImage: 'vs2017-win2016'
strategy:
maxParallel: 4
maxParallel: 5
matrix:
r_package:
r_package_msvc:
TASK: r-package
COMPILER: MSVC
R_VERSION: 3.6
TOOLCHAIN: MSVC
r_package_msys:
TASK: r-package
COMPILER: MINGW
R_VERSION: 4.0
TOOLCHAIN: MSYS
regular:
TASK: regular
PYTHON_VERSION: 3.6
Expand Down
45 changes: 42 additions & 3 deletions R-package/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,53 @@ Note: 32-bit (i386) R/Rtools is currently not supported.

#### Windows Preparation

Installing [Rtools](https://cran.r-project.org/bin/windows/Rtools/) is mandatory, and only support the 64-bit version. It requires to add to PATH the Rtools MinGW64 folder, if it was not done automatically during installation.
Installing a 64-bit version of [Rtools](https://cran.r-project.org/bin/windows/Rtools/) is mandatory.

The default compiler is Visual Studio (or [VS Build Tools](https://visualstudio.microsoft.com/downloads/)) in Windows, with an automatic fallback to Rtools or any [MinGW64](https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win64/Personal%20Builds/mingw-builds/) (x86_64-posix-seh) available (this means if you have only Rtools and CMake, it will compile fine).
After installing `Rtools` and `CMake`, be sure the following paths are added to the environment variable `PATH`. These may have been automatically added when installing other software.

To force the usage of Rtools / MinGW, you can set `use_mingw` to `TRUE` in `R-package/src/install.libs.R`.
* `Rtools`
- If you have `Rtools` 3.x, example:
- `C:\Rtools\mingw_64\bin`
- If you have `Rtools` 4.0, example:
- `C:\rtools40\mingw64\bin`
- `C:\rtools40\usr\bin`
* `CMake`
- example: `C:\Program Files\CMake\bin`
* `R`
- example: `C:\Program Files\R\R-3.6.1\bin`

NOTE: Two `Rtools` paths are required from `Rtools` 4.0 onwards because paths and the list of included software was changed in `Rtools` 4.0.

#### Windows Toolchain Options

A "toolchain" refers to the collection of software used to build the library. The R package can be built with three different toolchains.

**Warning for Windows users**: it is recommended to use *Visual Studio* for its better multi-threading efficiency in Windows for many core systems. For very simple systems (dual core computers or worse), MinGW64 is recommended for maximum performance. If you do not know what to choose, it is recommended to use [Visual Studio](https://visualstudio.microsoft.com/downloads/), the default compiler. **Do not try using MinGW in Windows on many core systems. It may result in 10x slower results than Visual Studio.**

**Visual Studio (default)**

By default, the package will be built with [Visual Studio Build Tools](https://visualstudio.microsoft.com/downloads/).

**MinGW (R 3.x)**

If you are using R 3.x and installation fails with Visual Studio, `LightGBM` will fall back to using [MinGW](http://mingw-w64.org/doku.php) bundled with `Rtools`.

If you want to force `LightGBM` to use MinGW (for any R version), open `R-package/src/install.libs.R` and change `use_mingw`:

```r
use_mingw <- TRUE
```

**MSYS2 (R 4.x)**

If you are using R 4.x and installation fails with Visual Studio, `LightGBM` will fall back to using [MSYS2](https://www.msys2.org/). This should work with the tools already bundled in `Rtools` 4.0.

If you want to force `LightGBM` to use MSYS2 (for any R version), open `R-package/src/install.libs.R` and change `use_msys2`:

```r
use_msys2 <- TRUE
```

#### Mac OS Preparation

You can perform installation either with **Apple Clang** or **gcc**. In case you prefer **Apple Clang**, you should install **OpenMP** (details for installation can be found in [Installation Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#apple-clang)) first and **CMake** version 3.16 or higher is required. In case you prefer **gcc**, you need to install it (details for installation can be found in [Installation Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Installation-Guide.rst#gcc)) and set some environment variables to tell R to use `gcc` and `g++`. If you install these from Homebrew, your versions of `g++` and `gcc` are most likely in `/usr/local/bin`, as shown below.
Expand Down
98 changes: 98 additions & 0 deletions R-package/inst/make-r-def.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# [description]
# Create a definition file (.def) from a .dll file, using objdump.
#
# [usage]
#
# Rscript make-r-def.R something.dll something.def
#
# [references]
# * https://www.cs.colorado.edu/~main/cs1300/doc/mingwfaq.html

args <- commandArgs(trailingOnly = TRUE)

IN_DLL_FILE <- args[[1L]]
OUT_DEF_FILE <- args[[2L]]
DLL_BASE_NAME <- basename(IN_DLL_FILE)

message(sprintf("Creating '%s' from '%s'", OUT_DEF_FILE, IN_DLL_FILE))

# system() will not raise an R exception if the process called
# fails. Wrapping it here to get that behavior.
#
# system() introduces a lot of overhead, at least on Windows,
# so trying processx if it is available
.pipe_shell_command_to_stdout <- function(command, args, out_file) {
has_processx <- suppressMessages({
suppressWarnings({
require("processx") # nolint
})
})
if (has_processx) {
p <- processx::process$new(
command = command
, args = args
, stdout = out_file
, windows_verbatim_args = FALSE
)
invisible(p$wait())
} else {
message(paste0(
"Using system2() to run shell commands. Installing "
, "'processx' with install.packages('processx') might "
, "make this faster."
))
# shQuote() is necessary here since one of the arguments
# is a file-path to R.dll, which may have spaces. processx
# does such quoting but system2() does not
exit_code <- system2(
command = command
, args = shoQuote(args)
, stdout = out_file
)
if (exit_code != 0L) {
stop(paste0("Command failed with exit code: ", exit_code))
}
}
return(invisible(NULL))
}

# use objdump to dump all the symbols
OBJDUMP_FILE <- "objdump-out.txt"
.pipe_shell_command_to_stdout(
command = "objdump"
, args = c("-p", IN_DLL_FILE)
, out_file = OBJDUMP_FILE
)

objdump_results <- readLines(OBJDUMP_FILE)
invisible(file.remove(OBJDUMP_FILE))

# Only one table in the objdump results matters for our purposes,
# see https://www.cs.colorado.edu/~main/cs1300/doc/mingwfaq.html
start_index <- which(
grepl(
pattern = "[Ordinal/Name Pointer] Table"
, x = objdump_results
, fixed = TRUE
)
)
empty_lines <- which(objdump_results == "")
end_of_table <- empty_lines[empty_lines > start_index][1L]

# Read the contents of the table
exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
exported_symbols <- gsub("\t", "", exported_symbols)
exported_symbols <- gsub(".*\\] ", "", exported_symbols)
exported_symbols <- gsub(" ", "", exported_symbols)

# Write R.def file
writeLines(
text = c(
paste0("LIBRARY \"", DLL_BASE_NAME, "\"")
, "EXPORTS"
, exported_symbols
)
, con = OUT_DEF_FILE
, sep = "\n"
)
message(sprintf("Successfully created '%s'", OUT_DEF_FILE))
Loading

0 comments on commit e83042f

Please sign in to comment.