|
@@ -36,6 +36,17 @@ do { \
|
|
*(uint32_t*)(lum_plane+lum_pos1) = get_m128_32_1(pack_val); \
|
|
*(uint32_t*)(lum_plane+lum_pos1) = get_m128_32_1(pack_val); \
|
|
} while (false)
|
|
} while (false)
|
|
|
|
|
|
|
|
+#define pack_val(lum_plane, lum_pos0, lum_pos1, line1, line2, mask) \
|
|
|
|
+do { \
|
|
|
|
+ __m128i pack_val = _mm_packs_epi32( \
|
|
|
|
+ _mm_and_si128(line1, mask), \
|
|
|
|
+ _mm_and_si128(line2, mask)); \
|
|
|
|
+ pack_val = _mm_packus_epi16(pack_val, pack_val); \
|
|
|
|
+ \
|
|
|
|
+ *(uint32_t*)(lum_plane+lum_pos0) = get_m128_32_0(pack_val); \
|
|
|
|
+ *(uint32_t*)(lum_plane+lum_pos1) = get_m128_32_1(pack_val); \
|
|
|
|
+} while (false)
|
|
|
|
+
|
|
#define pack_ch_1plane(uv_plane, chroma_pos, line1, line2, uv_mask) \
|
|
#define pack_ch_1plane(uv_plane, chroma_pos, line1, line2, uv_mask) \
|
|
do { \
|
|
do { \
|
|
__m128i add_val = _mm_add_epi64( \
|
|
__m128i add_val = _mm_add_epi64( \
|
|
@@ -152,6 +163,45 @@ void compress_uyvx_to_nv12(
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+void convert_uyvx_to_i444(
|
|
|
|
+ const uint8_t *input, uint32_t in_linesize,
|
|
|
|
+ uint32_t start_y, uint32_t end_y,
|
|
|
|
+ uint8_t *output[], const uint32_t out_linesize[])
|
|
|
|
+{
|
|
|
|
+ uint8_t *lum_plane = output[0];
|
|
|
|
+ uint8_t *u_plane = output[1];
|
|
|
|
+ uint8_t *v_plane = output[2];
|
|
|
|
+ uint32_t width = min_uint32(in_linesize, out_linesize[0]);
|
|
|
|
+ uint32_t y;
|
|
|
|
+
|
|
|
|
+ __m128i lum_mask = _mm_set1_epi32(0x0000FF00);
|
|
|
|
+ __m128i u_mask = _mm_set1_epi32(0x000000FF);
|
|
|
|
+ __m128i v_mask = _mm_set1_epi32(0x00FF0000);
|
|
|
|
+
|
|
|
|
+ for (y = start_y; y < end_y; y += 2) {
|
|
|
|
+ uint32_t y_pos = y * in_linesize;
|
|
|
|
+ uint32_t lum_y_pos = y * out_linesize[0];
|
|
|
|
+ uint32_t x;
|
|
|
|
+
|
|
|
|
+ for (x = 0; x < width; x += 4) {
|
|
|
|
+ const uint8_t *img = input + y_pos + x*4;
|
|
|
|
+ uint32_t lum_pos0 = lum_y_pos + x;
|
|
|
|
+ uint32_t lum_pos1 = lum_pos0 + out_linesize[0];
|
|
|
|
+
|
|
|
|
+ __m128i line1 = _mm_load_si128((const __m128i*)img);
|
|
|
|
+ __m128i line2 = _mm_load_si128(
|
|
|
|
+ (const __m128i*)(img + in_linesize));
|
|
|
|
+
|
|
|
|
+ pack_shift(lum_plane, lum_pos0, lum_pos1,
|
|
|
|
+ line1, line2, lum_mask, 1);
|
|
|
|
+ pack_val(u_plane, lum_pos0, lum_pos1,
|
|
|
|
+ line1, line2, u_mask);
|
|
|
|
+ pack_shift(v_plane, lum_pos0, lum_pos1,
|
|
|
|
+ line1, line2, v_mask, 2);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
void decompress_420(
|
|
void decompress_420(
|
|
const uint8_t *const input[], const uint32_t in_linesize[],
|
|
const uint8_t *const input[], const uint32_t in_linesize[],
|
|
uint32_t start_y, uint32_t end_y,
|
|
uint32_t start_y, uint32_t end_y,
|