#include <stdint.h>
#include <immintrin.h>
Go to the source code of this file.
◆ DotProduct()
double DotProduct |
( |
const double * |
u, |
|
|
const double * |
v, |
|
|
int |
n |
|
) |
| |
◆ DotProductAVX()
double DotProductAVX |
( |
const double * |
u, |
|
|
const double * |
v, |
|
|
int |
n |
|
) |
| |
Definition at line 4 of file dotproduct-test.cpp.
5 int max_offset = n - 3;
9 __m256d sum = _mm256_setzero_pd();
12 if ((reinterpret_cast<const uintptr_t>(
u) & 31) == 0 &&
13 (reinterpret_cast<const uintptr_t>(
v) & 31) == 0) {
16 __m256d floats1 = _mm256_load_pd(
u + offset);
17 __m256d floats2 = _mm256_load_pd(
v + offset);
28 __m256d product = _mm256_mul_pd(floats1, floats2);
29 sum = _mm256_add_pd(sum, product);
30 }
while (offset < max_offset);
34 __m256d floats1 = _mm256_loadu_pd(
u + offset);
35 __m256d floats2 = _mm256_loadu_pd(
v + offset);
37 __builtin_prefetch(
u + offset);
38 __builtin_prefetch(
u + offset + 1);
39 __builtin_prefetch(
u + offset + 2);
40 __builtin_prefetch(
u + offset + 3);
41 __builtin_prefetch(
v + offset);
42 __builtin_prefetch(
v + offset + 1);
43 __builtin_prefetch(
v + offset + 2);
44 __builtin_prefetch(
v + offset + 3);
46 __m256d product = _mm256_mul_pd(floats1, floats2);
47 sum = _mm256_add_pd(sum, product);
48 }
while (offset <= max_offset);
54 __m256d sum2 = _mm256_permute2f128_pd(sum, sum, 1);
55 sum = _mm256_hadd_pd(sum, sum2);
56 sum = _mm256_hadd_pd(sum, sum);
61 __m256i cast_sum = _mm256_castpd_si256(sum);
62 #if defined(_WIN32) || defined(__i386__) || 1 66 *(
reinterpret_cast<int64_t*
>(&result)) = ((uint64_t*)&cast_sum)[0];
68 *(
reinterpret_cast<int64_t*
>(&result)) = _mm256_extract_epi64(cast_sum, 0);
◆ DotProductSIMD()
double DotProductSIMD |
( |
const double * |
u, |
|
|
const double * |
v, |
|
|
int |
n |
|
) |
| |
Definition at line 79 of file dotproduct-test.cpp.
82 #pragma omp simd aligned(u, v: 32) 86 for (
int k = 0; k < n; ++k) total +=
u[k] *
v[k];