SSE sqrt还是比C math库的sqrtf快了不少
#include <stdio.h>
#include <xmmintrin.h>
#define NOMINMAX
#include <windows.h>
#include <math.h>
#include <time.h>
__forceinline float fast_sqrt(
float x)
{
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(x)));
}
int main(
int argc,
char *
argv[])
{
const int N =
100000000;
float *buf =
new float[N];
for (
int i =
0; i < N; ++
i)
{
buf[i] =
1000.0f * (
float)rand() / (
float)RAND_MAX;
}
float sum;
int start_time;
sum =
0.0f;
start_time =
clock();
for (
int i =
0; i < N; ++
i)
{
sum +=
sqrtf(buf[i]);
}
printf("sum = %f in clock %d\n", sum, clock() -
start_time);
sum =
0.0f;
start_time =
clock();
for (
int i =
0; i < N; ++
i)
{
sum +=
fast_sqrt(buf[i]);
}
printf("sum (fast) = %f in clock %d\n", sum, clock() -
start_time);
delete[]buf;
return 0;
}
测试结果:
sum = 536870912.000000 in clock 391sum (fast) = 536870912.000000 in clock 281
posted on
2017-11-28 21:07
Len3d 阅读(
...) 评论(
)
编辑
收藏
转载于:https://www.cnblogs.com/len3d/p/7912062.html
相关资源:DirectX修复工具V4.0增强版