Morton Codes SIMD

Here’s a SIMD morton code generator. Not as fast as the LUT method (http://www.forceflow.be/2013/10/07/morton-encodingdecoding-through-bit-interleaving-implementations/), but could be extended to produce 4 codes per call.
template<typename T>
void show_binary(const T& a)
{
const char* beg = reinterpret_cast<const char*>(&a);
const char* end = beg + sizeof(a);
while(beg != end)
std::cout << std::bitset<CHAR_BIT>(*beg++) << ‘ ‘;
std::cout << std::endl;
}

static uint32_t encode_morton(int x, int y) {
static const __m128i SB[] = {
_mm_set1_epi32(0x55555555),
_mm_set1_epi32(0x33333333),
_mm_set1_epi32(0x0F0F0F0F),
_mm_set1_epi32(0x00FF00FF)
};
const __m128i x_bit1 = _mm_set1_epi32( x );
const __m128i y_bit1 = _mm_set1_epi32( y );
const __m128i shf_x1 = _mm_slli_epi32(x_bit1, S[3]);
const __m128i shf_y1 = _mm_slli_epi32(x_bit1, S[3]);
const __m128i and_x1 = _mm_and_si128(shf_x1, SB[3]);
const __m128i and_y1 = _mm_and_si128(shf_y1, SB[3]);
const __m128i x_bit2 = _mm_or_si128(x_bit1, and_x1);
const __m128i y_bit2 = _mm_or_si128(y_bit1, and_y1);
const __m128i shf_x2 = _mm_slli_epi32(x_bit2, S[2]);
const __m128i shf_y2 = _mm_slli_epi32(y_bit2, S[2]);
const __m128i and_x2 = _mm_and_si128(shf_x2, SB[2]);
const __m128i and_y2 = _mm_and_si128(shf_y2, SB[2]);
const __m128i x_bit3 = _mm_or_si128(x_bit2, and_x2);
const __m128i y_bit3 = _mm_or_si128(y_bit2, and_y2);
const __m128i shf_x3 = _mm_slli_epi32(x_bit3, S[1]);
const __m128i shf_y3 = _mm_slli_epi32(y_bit3, S[1]);
const __m128i and_x3 = _mm_and_si128(shf_x3, SB[1]);
const __m128i and_y3 = _mm_and_si128(shf_y3, SB[1]);
const __m128i x_bit4 = _mm_or_si128(x_bit3, and_x3);
const __m128i y_bit4 = _mm_or_si128(y_bit3, and_y3);
const __m128i shf_x4 = _mm_slli_epi32(x_bit4, S[0]);
const __m128i shf_y4 = _mm_slli_epi32(y_bit4, S[0]);
const __m128i and_x4 = _mm_and_si128(shf_x4, SB[0]);
const __m128i and_y4 = _mm_and_si128(shf_y4, SB[0]);
const __m128i x_bit5 = _mm_or_si128(x_bit4, and_x4);
const __m128i y_bit5 = _mm_or_si128(y_bit4, and_y4);
const __m128i morton = _mm_or_si128(x_bit5, _mm_slli_epi32(y_bit5,1));

show_binary(morton);
return (((uint32_t*)&morton)[0]);
}

Copy on pastebin:

http://pastebin.com/JfbkFmrc

Leave a comment