SIMD Array of structs -> Struct of arrays and vice versa

// 4 128bit registers in SOA to 4 vec4 in AOS
static void soa_to_aos_vec4(
   __m128 * block,
   glm::vec4 * test_vec
   ){
   float *pvec = (float*) &block[0];
   test_vec[0] = glm::vec4(pvec[3], pvec[7], pvec[11], pvec[15]);
   test_vec[1] = glm::vec4(pvec[2], pvec[6], pvec[10], pvec[14]);
   test_vec[2] = glm::vec4(pvec[1], pvec[5], pvec[9], pvec[13]);
   test_vec[3] = glm::vec4(pvec[0], pvec[4], pvec[8], pvec[12]);
}

// in -> 4 vec4 in AOS out-> 4 128bit registers in SOA
static void aos_to_soa_vec4(
   __m128 * block,
   glm::vec4 * test_vec
   ){

__m128 xy10, xy32, zw10, zw32;

   xy10 = zw10 = _mm_setzero_ps();
   xy32 = zw32 = _mm_setzero_ps();
   xy10 = _mm_loadl_pi(xy10, (__m64*)&(test_vec[0]).x);
   zw10 = _mm_loadl_pi(zw10, (__m64*)&(test_vec[0]).z);
   xy32 = _mm_loadl_pi(xy32, (__m64*)&(test_vec[2]).x);
   zw32 = _mm_loadl_pi(zw32, (__m64*)&(test_vec[2]).z);
   xy10 = _mm_loadh_pi(xy10, (__m64*)&(test_vec[1]).x);
   zw10 = _mm_loadh_pi(zw10, (__m64*)&(test_vec[1]).z);
   xy32 = _mm_loadh_pi(xy32, (__m64*)&(test_vec[3]).x);
   zw32 = _mm_loadh_pi(zw32, (__m64*)&(test_vec[3]).z);
   block[0] = _mm_shuffle_ps(xy10, xy32, _MM_SHUFFLE(2,0,2,0));
   block[1] = _mm_shuffle_ps(xy10, xy32, _MM_SHUFFLE(3,1,3,1));

   block[2] = _mm_shuffle_ps(zw10, zw32, _MM_SHUFFLE(2,0,2,0));
   block[3] = _mm_shuffle_ps(zw10, zw32, _MM_SHUFFLE(3,1,3,1));
}

Spatial Hashing

Microsoft perfect spatial hashing:

Click to access perfecthash.pdf

Webgl + JS implementation:

http://blog.tojicode.com/2012/08/more-gpu-tile-map-demos-zelda.html

Morton Codes SIMD

Here’s a SIMD morton code generator. Not as fast as the LUT method (http://www.forceflow.be/2013/10/07/morton-encodingdecoding-through-bit-interleaving-implementations/), but could be extended to produce 4 codes per call.
template<typename T>
void show_binary(const T& a)
{
const char* beg = reinterpret_cast<const char*>(&a);
const char* end = beg + sizeof(a);
while(beg != end)
std::cout << std::bitset<CHAR_BIT>(*beg++) << ‘ ‘;
std::cout << std::endl;
}

static uint32_t encode_morton(int x, int y) {
static const __m128i SB[] = {
_mm_set1_epi32(0x55555555),
_mm_set1_epi32(0x33333333),
_mm_set1_epi32(0x0F0F0F0F),
_mm_set1_epi32(0x00FF00FF)
};
const __m128i x_bit1 = _mm_set1_epi32( x );
const __m128i y_bit1 = _mm_set1_epi32( y );
const __m128i shf_x1 = _mm_slli_epi32(x_bit1, S[3]);
const __m128i shf_y1 = _mm_slli_epi32(x_bit1, S[3]);
const __m128i and_x1 = _mm_and_si128(shf_x1, SB[3]);
const __m128i and_y1 = _mm_and_si128(shf_y1, SB[3]);
const __m128i x_bit2 = _mm_or_si128(x_bit1, and_x1);
const __m128i y_bit2 = _mm_or_si128(y_bit1, and_y1);
const __m128i shf_x2 = _mm_slli_epi32(x_bit2, S[2]);
const __m128i shf_y2 = _mm_slli_epi32(y_bit2, S[2]);
const __m128i and_x2 = _mm_and_si128(shf_x2, SB[2]);
const __m128i and_y2 = _mm_and_si128(shf_y2, SB[2]);
const __m128i x_bit3 = _mm_or_si128(x_bit2, and_x2);
const __m128i y_bit3 = _mm_or_si128(y_bit2, and_y2);
const __m128i shf_x3 = _mm_slli_epi32(x_bit3, S[1]);
const __m128i shf_y3 = _mm_slli_epi32(y_bit3, S[1]);
const __m128i and_x3 = _mm_and_si128(shf_x3, SB[1]);
const __m128i and_y3 = _mm_and_si128(shf_y3, SB[1]);
const __m128i x_bit4 = _mm_or_si128(x_bit3, and_x3);
const __m128i y_bit4 = _mm_or_si128(y_bit3, and_y3);
const __m128i shf_x4 = _mm_slli_epi32(x_bit4, S[0]);
const __m128i shf_y4 = _mm_slli_epi32(y_bit4, S[0]);
const __m128i and_x4 = _mm_and_si128(shf_x4, SB[0]);
const __m128i and_y4 = _mm_and_si128(shf_y4, SB[0]);
const __m128i x_bit5 = _mm_or_si128(x_bit4, and_x4);
const __m128i y_bit5 = _mm_or_si128(y_bit4, and_y4);
const __m128i morton = _mm_or_si128(x_bit5, _mm_slli_epi32(y_bit5,1));

show_binary(morton);
return (((uint32_t*)&morton)[0]);
}

Copy on pastebin:

http://pastebin.com/JfbkFmrc

Better late than never.

Why are you here? Are you searching for something important or just cruising?

Well, don’t let the heat out, come on in and warm your cockles by the fire.

badgerwrangler

non ædificabis illud de

Uncategorized

SIMD Array of structs -> Struct of arrays and vice versa

Spatial Hashing

Morton Codes SIMD

Better late than never.