#include<ctime>
#include<iostream>
#include<memory>
using namespace std;
class Clock {
public:
Clock() : start_time_(std::clock()) {}
std::clock_t Now() const { return std::clock() - start_time_; }
double NowSeconds() const {
return static_cast<double>(std::clock() - start_time_) / CLOCKS_PER_SEC;
}
void Reset() { start_time_ = std::clock(); }
private:
std::clock_t start_time_;
};
void Func(int* a, int *b, int* c, int size) {
for(int i=0; i<size; ++i) {
*a = *b * (*c);
++a; ++b; ++c;
}
}
void FuncForSIMD(int* __restrict__ a, int* __restrict__ b, int* __restrict__ c,
int size) {
for(int i=0; i<size; ++i) {
*a = *b * (*c);
++a; ++b; ++c;
}
}
int main() {
constexpr int size = 1000000000;
unique_ptr<int> a(new int[size]);
unique_ptr<int> b(new int[size]);
unique_ptr<int> c(new int[size]);
Clock clock;
Func(a.get(), b.get(), c.get(), size);
cout << "time without simd: " << clock.Now() << endl;
clock.Reset();
FuncForSIMD(a.get(), b.get(), c.get(), size);
cout << "time with simd: " << clock.Now() << endl;
return 0;
}
To compile we need to run:
g++ -std=c++0x simd.cc -o simd -O3The output on my machine looks as follow:
time without simd: 2160000 time with simd: 390000So it gives 5.5x faster execution time.
Great explanation is given here:
Demystifying The Restrict Keyword - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html