Lines Matching refs:dst
82 extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, in rsdIntrinsicConvolve3x3_K() argument
136 _mm_storel_epi64((__m128i *)dst, o0); in rsdIntrinsicConvolve3x3_K()
141 dst = (char *)dst + 8; in rsdIntrinsicConvolve3x3_K()
145 void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, in rsdIntrinsicColorMatrix4x4_K() argument
193 _mm_storeu_si128((__m128i *)dst, o4); in rsdIntrinsicColorMatrix4x4_K()
196 dst = (char *)dst + 16; in rsdIntrinsicColorMatrix4x4_K()
200 void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, in rsdIntrinsicColorMatrix3x3_K() argument
247 _mm_storeu_si128((__m128i *)dst, o4); in rsdIntrinsicColorMatrix3x3_K()
250 dst = (char *)dst + 16; in rsdIntrinsicColorMatrix3x3_K()
254 void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, in rsdIntrinsicColorMatrixDot_K() argument
299 _mm_storeu_si128((__m128i *)dst, o4); in rsdIntrinsicColorMatrixDot_K()
302 dst = (char *)dst + 16; in rsdIntrinsicColorMatrixDot_K()
306 void rsdIntrinsicBlurVFU4_K(void *dst, in rsdIntrinsicBlurVFU4_K() argument
337 _mm_storeu_ps((float *)dst, bp0); in rsdIntrinsicBlurVFU4_K()
338 _mm_storeu_ps((float *)dst + 4, bp1); in rsdIntrinsicBlurVFU4_K()
339 dst = (char *)dst + 32; in rsdIntrinsicBlurVFU4_K()
343 void rsdIntrinsicBlurHFU4_K(void *dst, in rsdIntrinsicBlurHFU4_K() argument
371 *(int *)dst = _mm_cvtsi128_si32(_mm_shuffle_epi8(o, Mu8)); in rsdIntrinsicBlurHFU4_K()
372 dst = (char *)dst + 4; in rsdIntrinsicBlurHFU4_K()
376 void rsdIntrinsicBlurHFU1_K(void *dst, in rsdIntrinsicBlurHFU1_K() argument
408 *(int *)dst = _mm_cvtsi128_si32(_mm_shuffle_epi8(o, Mu8)); in rsdIntrinsicBlurHFU1_K()
409 dst = (char *)dst + 4; in rsdIntrinsicBlurHFU1_K()
413 void rsdIntrinsicYuv_K(void *dst, in rsdIntrinsicYuv_K() argument
468 _mm_storeu_si128((__m128i *)dst, y4); in rsdIntrinsicYuv_K()
471 dst = (__m128i *)dst + 1; in rsdIntrinsicYuv_K()
475 void rsdIntrinsicYuvR_K(void *dst, in rsdIntrinsicYuvR_K() argument
530 _mm_storeu_si128((__m128i *)dst, y4); in rsdIntrinsicYuvR_K()
533 dst = (__m128i *)dst + 1; in rsdIntrinsicYuvR_K()
537 void rsdIntrinsicYuv2_K(void *dst, in rsdIntrinsicYuv2_K() argument
591 _mm_storeu_si128((__m128i *)dst, y4); in rsdIntrinsicYuv2_K()
595 dst = (__m128i *)dst + 1; in rsdIntrinsicYuv2_K()
599 extern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, in rsdIntrinsicConvolve5x5_K() argument
752 _mm_storeu_si128((__m128i *)dst, o0); in rsdIntrinsicConvolve5x5_K()
759 dst = (char *)dst + 16; in rsdIntrinsicConvolve5x5_K()
763 void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcOver_K() argument
774 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcOver_K()
775 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcOver_K()
811 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcOver_K()
812 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcOver_K()
815 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcOver_K()
819 void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstOver_K() argument
830 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstOver_K()
831 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstOver_K()
868 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstOver_K()
869 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstOver_K()
872 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstOver_K()
876 void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcIn_K() argument
885 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcIn_K()
886 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcIn_K()
918 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcIn_K()
919 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcIn_K()
922 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcIn_K()
926 void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstIn_K() argument
935 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstIn_K()
936 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstIn_K()
968 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstIn_K()
969 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstIn_K()
972 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstIn_K()
976 void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcOut_K() argument
987 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcOut_K()
988 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcOut_K()
1020 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcOut_K()
1021 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcOut_K()
1024 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcOut_K()
1028 void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstOut_K() argument
1039 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstOut_K()
1040 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstOut_K()
1072 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstOut_K()
1073 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstOut_K()
1076 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstOut_K()
1080 void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSrcAtop_K() argument
1092 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSrcAtop_K()
1093 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSrcAtop_K()
1143 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendSrcAtop_K()
1144 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendSrcAtop_K()
1147 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSrcAtop_K()
1151 void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendDstAtop_K() argument
1163 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendDstAtop_K()
1164 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendDstAtop_K()
1214 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendDstAtop_K()
1215 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendDstAtop_K()
1218 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendDstAtop_K()
1222 void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendXor_K() argument
1229 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendXor_K()
1230 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendXor_K()
1235 _mm_storeu_si128((__m128i *)dst, out0); in rsdIntrinsicBlendXor_K()
1236 _mm_storeu_si128((__m128i *)dst + 1, out1); in rsdIntrinsicBlendXor_K()
1239 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendXor_K()
1243 void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendMultiply_K() argument
1251 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendMultiply_K()
1252 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendMultiply_K()
1272 _mm_storeu_si128((__m128i *)dst, t0); in rsdIntrinsicBlendMultiply_K()
1273 _mm_storeu_si128((__m128i *)dst + 1, t2); in rsdIntrinsicBlendMultiply_K()
1276 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendMultiply_K()
1280 void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendAdd_K() argument
1287 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendAdd_K()
1288 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendAdd_K()
1293 _mm_storeu_si128((__m128i *)dst, out0); in rsdIntrinsicBlendAdd_K()
1294 _mm_storeu_si128((__m128i *)dst + 1, out1); in rsdIntrinsicBlendAdd_K()
1297 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendAdd_K()
1301 void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8) { in rsdIntrinsicBlendSub_K() argument
1308 out0 = _mm_loadu_si128((const __m128i *)dst); in rsdIntrinsicBlendSub_K()
1309 out1 = _mm_loadu_si128((const __m128i *)dst + 1); in rsdIntrinsicBlendSub_K()
1314 _mm_storeu_si128((__m128i *)dst, out0); in rsdIntrinsicBlendSub_K()
1315 _mm_storeu_si128((__m128i *)dst + 1, out1); in rsdIntrinsicBlendSub_K()
1318 dst = (__m128i *)dst + 2; in rsdIntrinsicBlendSub_K()