Skip to content

Commit

Permalink
Adding f16 as Dtype (#696)
Browse files Browse the repository at this point in the history
* Adding f16 as Dtype
* Moving from impl Into<E> to impl Into<f64>
* Adding f32 rhs for scalar cmp operators
* [Breaking] Optimizer config values are now all f64 (#744)
---------

Co-authored-by: Viliam Vadocz <viliam.vadocz@gmail.com>
  • Loading branch information
coreylowman and ViliamVadocz authored Apr 27, 2023
1 parent 53561c4 commit 7626de4
Show file tree
Hide file tree
Showing 177 changed files with 2,287 additions and 1,104 deletions.
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ matrixmultiply = { version = "0.3.2", default-features = false, optional = true
zip = { version = "0.6.2", default-features = false, optional = true }
cblas-sys = { version = "0.1.4", default-features = false, optional = true }
libc = { version = "0.2", default-features = false, optional = true }
cudarc = { version = "0.9.7", default-features = false, optional = true, features = ["driver", "cublas", "nvrtc"] }
cudarc = { git = "https://github.com/coreylowman/cudarc", branch = "dfdx-half", default-features = false, optional = true, features = ["driver", "cublas", "nvrtc", "f16"] }
num-traits = { version = "0.2.15", default-features = false }
safetensors = { version = "0.3", default-features = false, optional = true }
memmap2 = { version = "0.5", default-features = false, optional = true }
half = { git = "https://github.com/starkat99/half-rs.git", branch = "main", optional = true, features = ["num-traits", "rand_distr"] }

[dev-dependencies]
tempfile = "3.3.0"
Expand All @@ -48,7 +49,7 @@ glob = { version = "0.3.1", optional = true }

[features]
default = ["std", "fast-alloc", "cpu-par-matmul"]
nightly = []
nightly = ["half?/use-intrinsics"]

std = ["cudarc?/std", "matrixmultiply?/std", "rand_distr/std_math"]
fast-alloc = ["std"]
Expand All @@ -61,9 +62,12 @@ cpu-mkl-matmul = ["dep:cblas-sys", "dep:libc"]
cuda = ["dep:cudarc", "dep:glob"]
cudnn = ["cuda", "cudarc?/cudnn"]

f16 = ["dep:half"]

numpy = ["dep:zip", "std"]
safetensors = ["dep:safetensors", "std", "dep:memmap2"]

test-f16 = ["f16"]
test-f64 = []
test-integrations = []
ci-check = ["cudarc?/ci-check"]
Expand Down
59 changes: 58 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ fn main() {
// If on nightly, enable "nightly" feature
maybe_enable_nightly();

#[cfg(feature = "cuda")]
cuda::set_include_dir();

#[cfg(feature = "cuda")]
cuda::build_ptx();

Expand All @@ -25,6 +28,52 @@ fn maybe_enable_nightly() {

#[cfg(feature = "cuda")]
mod cuda {
pub fn set_include_dir() {
// NOTE: copied from cudarc build.rs.
// We can't actually set a env!() value from another crate,
// so we have to do that here.

use std::path::PathBuf;

let env_vars = [
"CUDA_PATH",
"CUDA_ROOT",
"CUDA_TOOLKIT_ROOT_DIR",
"CUDNN_LIB",
];
#[allow(unused)]
let env_vars = env_vars
.into_iter()
.map(std::env::var)
.filter_map(Result::ok)
.map(Into::<PathBuf>::into);

let roots = [
"/usr",
"/usr/local/cuda",
"/opt/cuda",
"/usr/lib/cuda",
"C:/Program Files/NVIDIA GPU Computing Toolkit",
"C:/CUDA",
];
#[allow(unused)]
let roots = roots.into_iter().map(Into::<PathBuf>::into);

#[cfg(feature = "ci-check")]
let root: PathBuf = "ci".into();

#[cfg(not(feature = "ci-check"))]
let root = env_vars
.chain(roots)
.find(|path| path.join("include").join("cuda.h").is_file())
.unwrap();

println!(
"cargo:rustc-env=CUDA_INCLUDE_DIR={}",
root.join("include").display()
);
}

pub fn build_ptx() {
let out_dir = std::env::var("OUT_DIR").unwrap();
let kernel_paths: Vec<std::path::PathBuf> = glob::glob("src/**/*.cu")
Expand All @@ -38,6 +87,10 @@ mod cuda {

for path in &mut include_directories {
println!("cargo:rerun-if-changed={}", path.display());
let destination =
std::format!("{out_dir}/{}", path.file_name().unwrap().to_str().unwrap());
println!("cargo:rerun-if-changed={}", destination);
std::fs::copy(path.clone(), destination).unwrap();
// remove the filename from the path so it's just the directory
path.pop();
}
Expand Down Expand Up @@ -130,6 +183,8 @@ mod cuda {
.args(["--output-directory", &out_dir])
.args(&include_options)
.arg(p)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.unwrap()
})
Expand All @@ -139,7 +194,9 @@ mod cuda {
let output = child.wait_with_output().unwrap();
assert!(
output.status.success(),
"nvcc error while compiling {kernel_path:?}: {output:?}",
"nvcc error while compiling {kernel_path:?}:\n\n# stdout\n{:#}\n\n# stderr\n{:#}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
}

Expand Down
30 changes: 24 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,16 +241,23 @@ pub fn keep_denormals() {

#[cfg(test)]
pub(crate) mod tests {
pub use num_traits::{Float, FromPrimitive, NumCast, Zero};

#[cfg(not(feature = "cuda"))]
pub type TestDevice = crate::tensor::Cpu;

#[cfg(feature = "cuda")]
pub type TestDevice = crate::tensor::Cuda;

#[cfg(not(feature = "test-f64"))]
#[cfg(all(feature = "test-f64", feature = "test-f16"))]
compile_error!("f64 and f16 cannot be tested at the same time");

#[cfg(all(not(feature = "test-f16"), not(feature = "test-f64")))]
pub type TestDtype = f32;

#[cfg(feature = "test-f16")]
pub type TestDtype = half::f16;

#[cfg(feature = "test-f64")]
pub type TestDtype = f64;

Expand All @@ -275,6 +282,19 @@ pub(crate) mod tests {
}
}

#[cfg(feature = "f16")]
impl AssertClose for half::f16 {
type Elem = Self;
const DEFAULT_TOLERANCE: Self::Elem = half::f16::from_f32_const(1e-2);
fn get_far_pair(&self, rhs: &Self, tolerance: Self) -> Option<(Self, Self)> {
if num_traits::Float::abs(self - rhs) > tolerance {
Some((*self, *rhs))
} else {
None
}
}
}

impl AssertClose for f32 {
type Elem = f32;
const DEFAULT_TOLERANCE: Self::Elem = 1e-6;
Expand Down Expand Up @@ -349,12 +369,9 @@ pub(crate) mod tests {
macro_rules! assert_close_to_literal {
($Lhs:expr, $Rhs:expr) => {{
let lhs = $Lhs.array();
let rhs = $Rhs.ndmap(|x| num_traits::FromPrimitive::from_f64(x).unwrap());
let tol = AssertClose::get_default_tol(&lhs);
let far_pair = AssertClose::get_far_pair(
&lhs,
&$Rhs.ndmap(|x| num_traits::FromPrimitive::from_f64(x).unwrap()),
tol,
);
let far_pair = AssertClose::get_far_pair(&lhs, &rhs, tol);
if let Some((l, r)) = far_pair {
panic!("lhs != rhs | {l} != {r}");
}
Expand Down Expand Up @@ -411,5 +428,6 @@ pub(crate) mod tests {
}
}};
}

pub(crate) use assert_close;
}
Loading

0 comments on commit 7626de4

Please sign in to comment.