Add internal ctz/clz implementation.
This commit is contained in:
		
							parent
							
								
									454f89af9d
								
							
						
					
					
						commit
						75e273afcc
					
				| @ -628,6 +628,149 @@ struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus> { | ||||
|   // no value, error at compile time
 | ||||
| }; | ||||
| 
 | ||||
| template <typename BitsType, typename EnableIf = void> | ||||
| struct count_bits_impl { | ||||
|   static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value, | ||||
|                 "BitsType must be an unsigned integer"); | ||||
| 
 | ||||
|   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { | ||||
|     int n = CHAR_BIT * sizeof(BitsType); | ||||
|     int shift = n / 2; | ||||
|     while (bits > 0 && shift > 0) { | ||||
|       BitsType y = bits >> shift; | ||||
|       if (y > 0) { | ||||
|         n -= shift; | ||||
|         bits = y; | ||||
|       } | ||||
|       shift /= 2; | ||||
|     } | ||||
|     if (shift == 0) { | ||||
|       --n; | ||||
|     } | ||||
|     return n; | ||||
|   } | ||||
| 
 | ||||
|   static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { | ||||
|     int n = CHAR_BIT * sizeof(BitsType); | ||||
|     int shift = n / 2; | ||||
|     while (bits > 0 && shift > 0) { | ||||
|       BitsType y = bits << shift; | ||||
|       if (y > 0) { | ||||
|         n -= shift; | ||||
|         bits = y; | ||||
|       } | ||||
|       shift /= 2; | ||||
|     } | ||||
|     if (shift == 0) { | ||||
|       --n; | ||||
|     } | ||||
|     return n; | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| // Count leading zeros.
 | ||||
| template <typename BitsType> | ||||
| EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { | ||||
|   return count_bits_impl<BitsType>::clz(bits); | ||||
| } | ||||
| 
 | ||||
| // Count trailing zeros.
 | ||||
| template <typename BitsType> | ||||
| EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { | ||||
|   return count_bits_impl<BitsType>::ctz(bits); | ||||
| } | ||||
| 
 | ||||
| #if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG | ||||
| 
 | ||||
| template <typename BitsType> | ||||
| struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned int)>> { | ||||
|   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); | ||||
|   static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); | ||||
|   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { | ||||
|     static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT; | ||||
|     return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset; | ||||
|   } | ||||
| 
 | ||||
|   static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { | ||||
|     return bits == 0 ? kNumBits : __builtin_ctz(static_cast<unsigned int>(bits)); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| template <typename BitsType> | ||||
| struct count_bits_impl< | ||||
|     BitsType, std::enable_if_t<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>> { | ||||
|   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); | ||||
|   static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); | ||||
|   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { | ||||
|     static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT; | ||||
|     return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset; | ||||
|   } | ||||
| 
 | ||||
|   static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { | ||||
|     return bits == 0 ? kNumBits : __builtin_ctzl(static_cast<unsigned long>(bits)); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| template <typename BitsType> | ||||
| struct count_bits_impl<BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && | ||||
|                                                   sizeof(BitsType) <= sizeof(unsigned long long)>> { | ||||
|   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); | ||||
|   static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); | ||||
|   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { | ||||
|     static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT; | ||||
|     return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset; | ||||
|   } | ||||
| 
 | ||||
|   static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { | ||||
|     return bits == 0 ? kNumBits : __builtin_ctzll(static_cast<unsigned long long>(bits)); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| #elif EIGEN_COMP_MSVC | ||||
| 
 | ||||
| template <typename BitsType> | ||||
| struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned long)>> { | ||||
|   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); | ||||
|   static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); | ||||
|   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { | ||||
|     static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT); | ||||
|     unsigned long out; | ||||
|     _BitScanReverse(&out, static_cast<unsigned long>(bits)); | ||||
|     return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset); | ||||
|   } | ||||
| 
 | ||||
|   static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { | ||||
|     unsigned long out; | ||||
|     _BitScanForward(&out, static_cast<unsigned long>(bits)); | ||||
|     return bits == 0 ? kNumBits : static_cast<int>(out); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| #ifdef _WIN64 | ||||
| 
 | ||||
| template <typename BitsType> | ||||
| struct count_bits_impl< | ||||
|     BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>> { | ||||
|   static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); | ||||
|   static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); | ||||
|   static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { | ||||
|     static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(__int64) - sizeof(BitsType)) * CHAR_BIT); | ||||
|     unsigned long out; | ||||
|     _BitScanReverse64(&out, static_cast<unsigned __int64>(bits)); | ||||
|     return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset); | ||||
|   } | ||||
| 
 | ||||
|   static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { | ||||
|     unsigned long out; | ||||
|     _BitScanForward64(&out, static_cast<unsigned __int64>(bits)); | ||||
|     return bits == 0 ? kNumBits : static_cast<int>(out); | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| #endif  // _WIN64
 | ||||
| 
 | ||||
| #endif  // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
 | ||||
| 
 | ||||
| template <typename Scalar> | ||||
| struct random_default_impl<Scalar, false, true> { | ||||
|   static inline Scalar run(const Scalar& x, const Scalar& y) { | ||||
|  | ||||
| @ -48,7 +48,7 @@ if(CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK) | ||||
|   set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) | ||||
|   set(CHOLMOD_ALL_LIBS  ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) | ||||
|   ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ") | ||||
|    | ||||
| 
 | ||||
|   ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}") | ||||
| else() | ||||
|   ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ") | ||||
| @ -61,7 +61,7 @@ if(UMFPACK_FOUND AND EIGEN_BUILD_BLAS) | ||||
|   set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) | ||||
|   set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) | ||||
|   ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ") | ||||
|    | ||||
| 
 | ||||
|   ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}") | ||||
| else() | ||||
|   ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ") | ||||
| @ -74,7 +74,7 @@ if(KLU_FOUND AND EIGEN_BUILD_BLAS) | ||||
|   set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) | ||||
|   set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) | ||||
|   ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ") | ||||
|    | ||||
| 
 | ||||
|   ei_add_test(klu_support "" "${KLU_ALL_LIBS}") | ||||
| else() | ||||
|   ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ") | ||||
| @ -87,7 +87,7 @@ if(SuperLU_FOUND AND EIGEN_BUILD_BLAS) | ||||
|   set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) | ||||
|   set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) | ||||
|   ei_add_property(EIGEN_TESTED_BACKENDS  "SuperLU, ") | ||||
|    | ||||
| 
 | ||||
|   ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}") | ||||
| else() | ||||
|   ei_add_property(EIGEN_MISSING_BACKENDS  "SuperLU, ") | ||||
| @ -171,6 +171,7 @@ endif() | ||||
| set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official") | ||||
| add_custom_target(BuildOfficial) | ||||
| 
 | ||||
| ei_add_test(clz) | ||||
| ei_add_test(rand) | ||||
| ei_add_test(meta) | ||||
| ei_add_test(maxsizevector) | ||||
| @ -406,7 +407,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) | ||||
|   string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | ||||
|   string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | ||||
|   string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | ||||
|   string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")   | ||||
|   string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | ||||
| 
 | ||||
|   if(EIGEN_TEST_CUDA_CLANG) | ||||
|     string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") | ||||
| @ -433,12 +434,12 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) | ||||
|     set(CUDA_NVCC_FLAGS  "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}") | ||||
|     cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") | ||||
|   endif() | ||||
|    | ||||
| 
 | ||||
|   set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu") | ||||
|    | ||||
| 
 | ||||
|   ei_add_test(gpu_example) | ||||
|   ei_add_test(gpu_basic) | ||||
|    | ||||
| 
 | ||||
|   unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) | ||||
| 
 | ||||
| endif() | ||||
| @ -477,7 +478,7 @@ if (EIGEN_TEST_HIP) | ||||
|       message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen") | ||||
|     else () | ||||
|       message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}") | ||||
|     endif()  | ||||
|     endif() | ||||
|   endif() | ||||
| endif() | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										74
									
								
								test/clz.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								test/clz.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,74 @@ | ||||
| // This file is part of Eigen, a lightweight C++ template library
 | ||||
| // for linear algebra.
 | ||||
| //
 | ||||
| // Copyright (C) 2023 The Eigen Authors
 | ||||
| //
 | ||||
| // This Source Code Form is subject to the terms of the Mozilla
 | ||||
| // Public License v. 2.0. If a copy of the MPL was not distributed
 | ||||
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | ||||
| 
 | ||||
| #include "main.h" | ||||
| 
 | ||||
| template <typename T> | ||||
| int ref_clz(T val) { | ||||
|   constexpr int kNumBits = sizeof(T) * CHAR_BIT; | ||||
|   T kMsbMask = T(1) << (kNumBits - 1); | ||||
|   int z = 0; | ||||
|   for (; z < kNumBits && ((val & kMsbMask) == 0); ++z) { | ||||
|     val <<= 1; | ||||
|   } | ||||
|   return z; | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| int ref_ctz(T val) { | ||||
|   constexpr int kNumBits = sizeof(T) * CHAR_BIT; | ||||
|   T kLsbMask = T(1); | ||||
|   int z = 0; | ||||
|   for (; z < kNumBits && ((val & kLsbMask) == 0); ++z) { | ||||
|     val >>= 1; | ||||
|   } | ||||
|   return z; | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| void test_clz_ctz() { | ||||
|   T step = sizeof(T) <= 2 ? 1 : (Eigen::NumTraits<T>::highest() / (T(1) << 16)); | ||||
|   T iters = Eigen::NumTraits<T>::highest() / step; | ||||
|   for (T i = 0; i < iters; ++i) { | ||||
|     T val = i * step; | ||||
|     int expected_clz = ref_clz(val); | ||||
|     int actual_clz = Eigen::internal::clz(val); | ||||
|     VERIFY(expected_clz == actual_clz); | ||||
| 
 | ||||
|     int expected_ctz = ref_ctz(val); | ||||
|     int actual_ctz = Eigen::internal::ctz(val); | ||||
|     VERIFY(expected_ctz == actual_ctz); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| void test_clz_ctz_random() { | ||||
|   for (int i = 0; i < 1024 * 1024; ++i) { | ||||
|     T val = Eigen::internal::random<T>(); | ||||
|     int expected_clz = ref_clz(val); | ||||
|     int actual_clz = Eigen::internal::clz(val); | ||||
|     VERIFY(expected_clz == actual_clz); | ||||
| 
 | ||||
|     int expected_ctz = ref_ctz(val); | ||||
|     int actual_ctz = Eigen::internal::ctz(val); | ||||
|     VERIFY(expected_ctz == actual_ctz); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| EIGEN_DECLARE_TEST(clz) { | ||||
|   CALL_SUBTEST_1(test_clz_ctz<uint8_t>()); | ||||
|   CALL_SUBTEST_2(test_clz_ctz<uint16_t>()); | ||||
|   CALL_SUBTEST_3(test_clz_ctz<uint32_t>()); | ||||
|   CALL_SUBTEST_4(test_clz_ctz<uint64_t>()); | ||||
| 
 | ||||
|   for (int i = 0; i < g_repeat; i++) { | ||||
|     test_clz_ctz_random<uint32_t>(); | ||||
|     test_clz_ctz_random<uint64_t>(); | ||||
|   } | ||||
| } | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Antonio Sánchez
						Antonio Sánchez