Notes of Software Engineer: March 2014

Tuesday, March 4, 2014

GCC and vectorization

#include<ctime>
#include<iostream>
#include<memory>

using namespace std;

class Clock {
 public:
  Clock() : start_time_(std::clock()) {}

  std::clock_t Now() const { return std::clock() - start_time_; }

  double NowSeconds() const {
    return static_cast<double>(std::clock() - start_time_) / CLOCKS_PER_SEC;
  }

  void Reset() { start_time_ = std::clock(); }

 private:
  std::clock_t start_time_;
};

void Func(int* a, int *b, int* c, int size) {
  for(int i=0; i<size; ++i) {
    *a = *b * (*c);
    ++a; ++b; ++c;
  }
}

void FuncForSIMD(int* __restrict__ a, int* __restrict__ b, int* __restrict__ c,
                 int size) {
  for(int i=0; i<size; ++i) {
    *a = *b * (*c);
    ++a; ++b; ++c;
  }
}

int main() {
  constexpr int size = 1000000000;
  unique_ptr<int> a(new int[size]);
  unique_ptr<int> b(new int[size]);
  unique_ptr<int> c(new int[size]);
  Clock clock;
  Func(a.get(), b.get(), c.get(), size);
  cout << "time without simd: " << clock.Now() << endl;
  clock.Reset();
  FuncForSIMD(a.get(), b.get(), c.get(), size);
  cout << "time with simd: " << clock.Now() << endl;
  return 0;
}

To compile we need to run:

g++ -std=c++0x simd.cc -o simd -O3

The output on my machine looks as follow:

time without simd: 2160000
time with simd: 390000

So it gives 5.5x faster execution time.
Great explanation is given here:
Demystifying The Restrict Keyword - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html

Notes of Software Engineer

Tuesday, March 4, 2014

GCC and vectorization

Pages

Popular Posts

Search This Blog

Labels

Blog Archive

About Me

Tuesday, March 4, 2014

GCC and vectorization

Pages

Popular Posts

Search This Blog

Labels

Subscribe To

Blog Archive

About Me