SSE utilities

// global functions

void
_mm_deinterleave_epi16(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1
    );

void
_mm_deinterleave_epi16(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1,
    __m128i& v_b0,
    __m128i& v_b1
    );

void
_mm_deinterleave_epi16(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1,
    __m128i& v_b0,
    __m128i& v_b1,
    __m128i& v_a0,
    __m128i& v_a1
    );

void
_mm_deinterleave_epi8(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1
    );

void
_mm_deinterleave_epi8(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1,
    __m128i& v_b0,
    __m128i& v_b1
    );

void
_mm_deinterleave_epi8(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1,
    __m128i& v_b0,
    __m128i& v_b1,
    __m128i& v_a0,
    __m128i& v_a1
    );

void
_mm_deinterleave_ps(
    __m128& v_r0,
    __m128& v_r1,
    __m128& v_g0,
    __m128& v_g1
    );

void
_mm_deinterleave_ps(
    __m128& v_r0,
    __m128& v_r1,
    __m128& v_g0,
    __m128& v_g1,
    __m128& v_b0,
    __m128& v_b1
    );

void
_mm_deinterleave_ps(
    __m128& v_r0,
    __m128& v_r1,
    __m128& v_g0,
    __m128& v_g1,
    __m128& v_b0,
    __m128& v_b1,
    __m128& v_a0,
    __m128& v_a1
    );

void
_mm_interleave_epi8(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1
    );

void
_mm_interleave_epi8(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1,
    __m128i& v_b0,
    __m128i& v_b1
    );

void
_mm_interleave_epi8(
    __m128i& v_r0,
    __m128i& v_r1,
    __m128i& v_g0,
    __m128i& v_g1,
    __m128i& v_b0,
    __m128i& v_b1,
    __m128i& v_a0,
    __m128i& v_a1
    );

void
_mm_interleave_ps(
    __m128& v_r0,
    __m128& v_r1,
    __m128& v_g0,
    __m128& v_g1
    );

void
_mm_interleave_ps(
    __m128& v_r0,
    __m128& v_r1,
    __m128& v_g0,
    __m128& v_g1,
    __m128& v_b0,
    __m128& v_b1
    );

void
_mm_interleave_ps(
    __m128& v_r0,
    __m128& v_r1,
    __m128& v_g0,
    __m128& v_g1,
    __m128& v_b0,
    __m128& v_b1,
    __m128& v_a0,
    __m128& v_a1
    );