Fix CUDA constexpr issues for numeric_limits.
Some CUDA/HIP constants fail on device with `constexpr` since they internally rely on non-constexpr functions, e.g. ``` \#define CUDART_INF_F __int_as_float(0x7f800000) ``` This fails for cuda-clang (though passes with nvcc). These constants are currently used by `device::numeric_limits`. For portability, we need to remove `constexpr` from the affected functions. For C++11 or higher, we should be able to rely on the `std::numeric_limits` versions anyways, since the methods themselves are now `constexpr`, so should be supported on device (clang/hipcc natively, nvcc with `--expr-relaxed-constexpr`).
This commit is contained in:
		
							parent
							
								
									af1247fbc1
								
							
						
					
					
						commit
						78ee3d6261
					
				| @ -763,8 +763,6 @@ | ||||
|     #if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500)) | ||||
|       #define EIGEN_HAS_CONSTEXPR 1 | ||||
|     #endif | ||||
|   #elif defined(EIGEN_HIPCC) | ||||
|   // Skip const_expr on the HIP platform
 | ||||
|   #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \ | ||||
|     (EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \ | ||||
|     (EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11))) | ||||
|  | ||||
| @ -275,7 +275,7 @@ template<bool Condition, typename T=void> struct enable_if; | ||||
| template<typename T> struct enable_if<true,T> | ||||
| { typedef T type; }; | ||||
| 
 | ||||
| #if defined(EIGEN_GPU_COMPILE_PHASE) | ||||
| #if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 | ||||
| #if !defined(__FLT_EPSILON__) | ||||
| #define __FLT_EPSILON__ FLT_EPSILON | ||||
| #define __DBL_EPSILON__ DBL_EPSILON | ||||
| @ -296,7 +296,7 @@ template<> struct numeric_limits<float> | ||||
| { | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   static float epsilon() { return __FLT_EPSILON__; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   static float (max)() { | ||||
|   #if defined(EIGEN_CUDA_ARCH) | ||||
|     return CUDART_MAX_NORMAL_F; | ||||
| @ -306,7 +306,7 @@ template<> struct numeric_limits<float> | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   static float (min)() { return FLT_MIN; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   static float infinity() { | ||||
|   #if defined(EIGEN_CUDA_ARCH) | ||||
|     return CUDART_INF_F; | ||||
| @ -314,7 +314,7 @@ template<> struct numeric_limits<float> | ||||
|     return HIPRT_INF_F; | ||||
|   #endif | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   static float quiet_NaN() { | ||||
|   #if defined(EIGEN_CUDA_ARCH) | ||||
|     return CUDART_NAN_F; | ||||
| @ -331,7 +331,7 @@ template<> struct numeric_limits<double> | ||||
|   static double (max)() { return DBL_MAX; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   static double (min)() { return DBL_MIN; } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   static double infinity() { | ||||
|   #if defined(EIGEN_CUDA_ARCH) | ||||
|     return CUDART_INF; | ||||
| @ -339,7 +339,7 @@ template<> struct numeric_limits<double> | ||||
|     return HIPRT_INF; | ||||
|   #endif | ||||
|   } | ||||
|   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   static double quiet_NaN() { | ||||
|   #if defined(EIGEN_CUDA_ARCH) | ||||
|     return CUDART_NAN; | ||||
| @ -414,7 +414,7 @@ template<> struct numeric_limits<bool> | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| #endif // defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
 | ||||
| 
 | ||||
| /** \internal
 | ||||
|   * A base class do disable default copy ctor and copy assignment operator. | ||||
| @ -761,7 +761,7 @@ template<typename T> EIGEN_DEVICE_FUNC   void swap(T &a, T &b) { T tmp = b; b = | ||||
| template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } | ||||
| #endif | ||||
| 
 | ||||
| #if defined(EIGEN_GPU_COMPILE_PHASE) | ||||
| #if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 | ||||
| using internal::device::numeric_limits; | ||||
| #else | ||||
| using std::numeric_limits; | ||||
|  | ||||
| @ -343,6 +343,21 @@ struct matrix_inverse { | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| template<typename T> | ||||
| struct numeric_limits_test { | ||||
|   EIGEN_DEVICE_FUNC | ||||
|   void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const | ||||
|   { | ||||
|     EIGEN_UNUSED_VARIABLE(in) | ||||
|     int out_idx = i * 5; | ||||
|     out[out_idx++] = numext::numeric_limits<float>::epsilon(); | ||||
|     out[out_idx++] = (numext::numeric_limits<float>::max)(); | ||||
|     out[out_idx++] = (numext::numeric_limits<float>::min)(); | ||||
|     out[out_idx++] = numext::numeric_limits<float>::infinity(); | ||||
|     out[out_idx++] = numext::numeric_limits<float>::quiet_NaN(); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| template<typename Type1, typename Type2> | ||||
| bool verifyIsApproxWithInfsNans(const Type1& a, const Type2& b, typename Type1::Scalar* = 0) // Enabled for Eigen's type only | ||||
| { | ||||
| @ -434,6 +449,9 @@ EIGEN_DECLARE_TEST(gpu_basic) | ||||
|   CALL_SUBTEST( run_and_compare_to_gpu(complex_operators<Vector3cf>(), nthreads, cfin, cfout) ); | ||||
|   CALL_SUBTEST( test_with_infs_nans(complex_sqrt<Vector3cf>(), nthreads, cfin, cfout) ); | ||||
| 
 | ||||
|   // numeric_limits | ||||
|   CALL_SUBTEST( test_with_infs_nans(numeric_limits_test<Vector3f>(), 1, in, out) ); | ||||
| 
 | ||||
| #if defined(__NVCC__) | ||||
|   // FIXME | ||||
|   // These subtests compiles only with nvcc and fail with HIPCC and clang-cuda | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Antonio Sanchez
						Antonio Sanchez