This section of the archives stores flipcode's complete Developer Toolbox collection, featuring a variety of mini-articles and source code contributions from our readers.

 

  Unrolling Loops With Meta-Programming
  Submitted by



Tim Sweeney recently posted a great piece of code as a "code of the day". He presented a generic, n-dimensional template for vertor spaces.

Tim's code objective was more clarity than performance, but some people complained about the fact that vector operations were implemented as for loops, MSVCPP is not clever enought to unroll.

So I decided to post this trick based on meta-programming that can be used to unroll statement execution.

This way, you can write a vector/matrix class template that will be exactly the same as your hand-written Vect3 class.

In the following code you will find:
  • the AssignOpLoopUnroller class which can be viewed as the "meta-program"
  • a toy matrix class using this stuff
  • a small foo-bar-main program using this class
  • and finally, some assembly code showing the unrolled loops. a.k.a. the proof! :)


  • Of course, this trick is not mine, I stole it to meta-programming gurus.

    If you want to know more about meta-programming, check:
  • The Blitz home-page: http://oonumerics.org/blitz/
  • Todd Veldhuizen's expression templates: http://extreme.indiana.edu/~tveldhui/papers/Expression-Templates/exprtmpl.html


  • Enjoy your unrolling!

    --
    Xavier Lemaire
    lemair_x@epita.fr

    //============================================================================]
    // The two following templated classes contain a static member function (doOperation)
    // performing assignation operation between two values(= or +=).
    // so T_lhs and T_rhs can be float, const float, double, a Complex class or whatever.
    //============================================================================]
    struct AssignOpAssign
    {
      template <typename T_lhs, typename T_rhs
      inline static void doOperation(T_lhs & lhs, T_rhs & rhs) { lhs = rhs; }
    };

    struct AssignOpAdd { template <typename T_lhs, typename T_rhs inline static void doOperation(T_lhs & lhs, T_rhs & rhs) { lhs += rhs; } };

    //============================================================================] // The AssignOpLoopUnroller class contains the code performing the loop unrolling // using the template recursion principle. // This principe is simple: a template with an integer parameter contains a static // function exec. Loop<N::exec calls Loop<N-1::exec // Explicit specialisation is used to stop the recursion: Loop<0::exec() does nothing. //============================================================================] template <typename T_lhs, typename T_rhs, typename T_operator struct AssignOpLoopUnroller { template <int N struct Loop { inline static void exec(T_lhs * aLArray, T_rhs * aRArray) { T_operator::doOperation(aLArray[N - 1], aRArray[N - 1]); Loop<N - 1::exec(aLArray, aRArray); } };

    struct Loop<0 { static inline void exec(T_lhs * aLArray, T_rhs * aRArray) { } }; };

    /** * A toy vector class illustrating the use of this "unrolling metaprogram" * Of course, other kinds of unrollers would have to be developed for other kind * of operations (like a ConstantAssignOpLoopUnroller). */ template <typename T, int N struct Vector { T & operator [] (int i) { return m_aValues[i]; }

    const T & operator [] (int i) const { return m_aValues[i]; }

    Vector & operator += (const Vector & rhs) { AssignOpLoopUnroller<T, const T, AssignOpAdd ::Loop<N::exec(m_aValues, rhs.m_aValues); return *this; }

    Vector & operator = (const Vector & rhs) { AssignOpLoopUnroller<T, const T, AssignOpAssign ::Loop<N::exec(m_aValues, rhs.m_aValues); return *this; }

    const Vector operator + (const Vector & rhs) const { return Vector(*this) += rhs; }

    protected: T m_aValues[N]; };

    //============================================================================] // Some code using this class. // Amazing, it works! :) //============================================================================] # include <iostream

    using namespace std;

    typedef Vector<float, 4 Vect4;

    void foo(Vect4 & lhs, Vect4 &rhs) { lhs += rhs; }

    void bar(Vect4 & lhs, Vect4 &rhs) { lhs = rhs; }

    void main() { Vect4 vect1; Vect4 vect2;

    vect1[0] = 1; vect1[1] = 7; vect1[2] = 3; vect1[3] = 11; vect2[0] = 41; vect2[1] = 35; vect2[2] = 39; vect2[3] = 31; foo(vect2, vect1); bar(vect1, vect2);

    cout << "vect1 ="; for (int i = 0; i < 4; ++i) cout << " " << vect1[i]; cout << endl; }

    //============================================================================] // Samples from the assembly listing generated by MSVC++ : //============================================================================] /*

    ?foo@@YIXAAU?$Vector@M$03@@0@Z PROC NEAR ; foo, COMDAT

    ; 92 : lhs += rhs;

    fld DWORD PTR [edx+12] fadd DWORD PTR [ecx+12] fstp DWORD PTR [ecx+12] fld DWORD PTR [edx+8] fadd DWORD PTR [ecx+8] fstp DWORD PTR [ecx+8] fld DWORD PTR [edx+4] fadd DWORD PTR [ecx+4] fstp DWORD PTR [ecx+4] fld DWORD PTR [ecx] fadd DWORD PTR [edx] fstp DWORD PTR [ecx]

    ; 93 : }

    ret 0 ?foo@@YIXAAU?$Vector@M$03@@0@Z ENDP ; foo



    PUBLIC ?bar@@YIXAAU?$Vector@M$03@@0@Z ; bar ; COMDAT ?bar@@YIXAAU?$Vector@M$03@@0@Z _TEXT SEGMENT ?bar@@YIXAAU?$Vector@M$03@@0@Z PROC NEAR ; bar, COMDAT

    ; 97 : lhs = rhs;

    mov eax, DWORD PTR [edx+12] mov DWORD PTR [ecx+12], eax mov eax, DWORD PTR [edx+8] mov DWORD PTR [ecx+8], eax mov eax, DWORD PTR [edx+4] mov DWORD PTR [ecx+4], eax mov edx, DWORD PTR [edx] mov DWORD PTR [ecx], edx

    ; 98 : }

    ret 0 ?bar@@YIXAAU?$Vector@M$03@@0@Z ENDP ; bar */


    /* The assembly code shows that as expected, the Vect4 operations are inlined and the loops are unrolled! The VCPP compiler inlining depth will limit this unrolling. By default, inline_depth == 8 You can increase this value up to 255. */



    The zip file viewer built into the Developer Toolbox made use of the zlib library, as well as the zlibdll source additions.

     

    Copyright 1999-2008 (C) FLIPCODE.COM and/or the original content author(s). All rights reserved.
    Please read our Terms, Conditions, and Privacy information.