neo-ai
diff --git a/‎src/codegen/codegen_cuda.cc‎
Lines changed: 14 additions & 0 deletions b/‎src/codegen/codegen_cuda.cc‎
Lines changed: 14 additions & 0 deletions
@@ -50,6 +50,20 @@ void CodeGenCUDA::AddFunction(LoweredFunc f) {
 std::string CodeGenCUDA::Finish() {
   if (enable_fp16_) {
     decl_stream << "#include <cuda_fp16.h>\n";
+    decl_stream << "__device__ half max" \
+                    "(const half a, const half b)\n"
+                    "{\n  return __hgt(__half(a), __half(b)) ? a : b;\n}\n";
+    decl_stream << "__device__ half min(const half a, const half b)\n"
+                    "{\n  return __hlt(__half(a), __half(b)) ? a : b;\n}\n";
+    decl_stream << "__device__ half operator+" \
+                    "(const volatile __half &a,  const volatile __half &b)\n"
+                    "{\n  return __hadd(a, b);\n}\n";
+    decl_stream << "__device__ half operator<=" \
+                   "(const volatile __half &a,  const volatile __half &b)\n"
+                    "{\n  return __hlt(a, b);\n}\n";
+    decl_stream << "__device__ half operator*" \
+                    "(const volatile __half &a,  const volatile __half &b)\n"
+                    "{\n  return __hmul(a, b);\n}\n";
   }
 
   if (enable_int8_) {