用SSE汇编求向量的点积
class
__declspec(align(
16
)) vector4
{public: union { struct { float x, y, z, w; }; __m128 mm; };}
;inline
void
dot3( vector4
&
r,
const
vector4
&
x,
const
vector4
&
y )
{ /**//*register vector4 s; s.mm = _mm_mul_ps( x.mm , y.mm ); r.mm = _mm_add_ss( s.mm , _mm_movehl_ps( s.mm , s.mm ) ); r.mm = _mm_add_ss( r.mm , _mm_shuffle_ps( r.mm , r.mm , 1 ) );*/ __asm { mov ecx, y mov edx, x mov eax, r movaps xmm0, xmmword ptr [ecx] movaps xmm1, xmmword ptr [edx] mulps xmm1, xmm0 // xmm1 = s0, s1, s2, s3 movhlps xmm0, xmm1 // xmm0 = s2, s3, *, * addss xmm0, xmm1 // xmm0 = s0+s2, s3, *, * shufps xmm1, xmm1, 1 // xmm1 = s1, s0, s0, s0 addss xmm0, xmm1 // xmm0 = s0+s2+s1, s3, *, * movaps xmmword ptr [eax], xmm0 }}
inline
void
dot4( vector4
*
r,
const
vector4
*
x,
const
vector4
*
y )
{ /**//*register vector4 s; s.mm = _mm_mul_ps( x->mm , y->mm ); r->mm = _mm_add_ss( s.mm , _mm_movehl_ps( s.mm , s.mm ) ); r->mm = _mm_add_ss( r->mm , _mm_shuffle_ps( r->mm , r->mm , 1 ) ); r->mm = _mm_add_ss( r->mm , _mm_shuffle_ps( r->mm , r->mm , 3 ) );*/ __asm { mov ecx, y mov edx, x mov eax, r movaps xmm0, xmmword ptr [ecx] movaps xmm1, xmmword ptr [edx] mulps xmm1, xmm0 // xmm1 = s0, s1, s2, s3 movhlps xmm0, xmm1 // xmm0 = s2, s3, *, * addss xmm0, xmm1 // xmm0 = s0+s2, s3, *, * shufps xmm1, xmm1, 253 // xmm1 = s1, s3, s3, s3 addss xmm0, xmm1 // xmm0 = s0+s2+s1, s3, *, * movhlps xmm1, xmm1 // xmm1 = s3, s3, s3, s3 addss xmm0, xmm1 // xmm0 = s0+s2+s1+s3, s3, *, * movaps xmmword ptr [eax], xmm0 }}
posted on
2007-07-14 17:15
Len3d 阅读(
...) 评论(
)
编辑
收藏
转载于:https://www.cnblogs.com/len3d/archive/2007/07/14/818165.html
转载请注明原文地址: https://win8.8miu.com/read-29588.html