From 307a417e1c1378b81b46b648fb0fed21e290ecd3 Mon Sep 17 00:00:00 2001 From: Charles Schlosser Date: Mon, 22 May 2023 16:39:24 +0000 Subject: [PATCH] Fix unrolled assignment evaluator --- Eigen/src/Core/AssignEvaluator.h | 40 +++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index d3be1923c..ec0382240 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -413,6 +413,30 @@ struct unaligned_dense_assignment_loop } }; +template +struct copy_using_evaluator_linearvec_CompleteUnrolling { + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + kernel.template assignPacket(Index); + enum { NextIndex = Index + unpacket_traits::size }; + copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_linearvec_CompleteUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {} +}; + template struct dense_assignment_loop { @@ -453,8 +477,8 @@ struct dense_assignment_loop::size, alignedSize = (int(size)/packetSize)*packetSize }; - copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); - copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); } }; @@ -670,24 +694,24 @@ public: } - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) { - m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); + m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); } - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) { - m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); + m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); } - template + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) { Index row = rowIndexByOuterInner(outer, inner); Index col = colIndexByOuterInner(outer, inner); - assignPacket(row, col); + assignPacket(row, col); } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)