#include<ctime> #include<iostream> #include<memory> using namespace std; class Clock { public: Clock() : start_time_(std::clock()) {} std::clock_t Now() const { return std::clock() - start_time_; } double NowSeconds() const { return static_cast<double>(std::clock() - start_time_) / CLOCKS_PER_SEC; } void Reset() { start_time_ = std::clock(); } private: std::clock_t start_time_; }; void Func(int* a, int *b, int* c, int size) { for(int i=0; i<size; ++i) { *a = *b * (*c); ++a; ++b; ++c; } } void FuncForSIMD(int* __restrict__ a, int* __restrict__ b, int* __restrict__ c, int size) { for(int i=0; i<size; ++i) { *a = *b * (*c); ++a; ++b; ++c; } } int main() { constexpr int size = 1000000000; unique_ptr<int> a(new int[size]); unique_ptr<int> b(new int[size]); unique_ptr<int> c(new int[size]); Clock clock; Func(a.get(), b.get(), c.get(), size); cout << "time without simd: " << clock.Now() << endl; clock.Reset(); FuncForSIMD(a.get(), b.get(), c.get(), size); cout << "time with simd: " << clock.Now() << endl; return 0; }To compile we need to run:
g++ -std=c++0x simd.cc -o simd -O3The output on my machine looks as follow:
time without simd: 2160000 time with simd: 390000So it gives 5.5x faster execution time.
Great explanation is given here:
Demystifying The Restrict Keyword - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html