Fix int overflow causing cxx11_tensor_gpu_1 to fail.
This commit is contained in:
		
							parent
							
								
									6f9ad7da61
								
							
						
					
					
						commit
						a25f02d73e
					
				| @ -655,13 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til | ||||
| 
 | ||||
|     const int block_size = device.maxGpuThreadsPerBlock(); | ||||
|     const int max_blocks = | ||||
|         numext::mini<int64_t>(device.getNumGpuMultiProcessors() * | ||||
|                               device.maxGpuThreadsPerMultiProcessor(), | ||||
|                           NumTraits<StorageIndex>::highest()) / | ||||
|         block_size; | ||||
|         static_cast<int>(numext::mini<int64_t>(device.getNumGpuMultiProcessors() * device.maxGpuThreadsPerMultiProcessor(), | ||||
|                                                NumTraits<StorageIndex>::highest()) / block_size); | ||||
|     const StorageIndex size = array_prod(evaluator.dimensions()); | ||||
|     // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
 | ||||
|     const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, numext::div_ceil<int>(size, block_size)), 1); | ||||
|     const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, static_cast<int>(numext::div_ceil<StorageIndex>(size, block_size))), 1); | ||||
| 
 | ||||
|     LAUNCH_GPU_KERNEL( | ||||
|         (EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, StorageIndex>), | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Antonio Sánchez
						Antonio Sánchez