00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "mmsgui/fb/mmsfbconv.h"
00034
00035 #ifdef __HAVE_PF_ARGB__
00036 #ifdef __HAVE_PF_YV12__
00037
00038 #include "mmstools/mmstools.h"
00039
00040
00041 #ifdef __HAVE_SSE__
00042
00043 v4si X1 = { 0x00ff00ff, 0x00ff00ff };
00044 v4six Y_RBRB = { 25, 66, 25, 66 };
00045 v4six Y_GG = { 129, 0, 129, 0 };
00046 v4six U_RBRB = { 112, -38, 112, -38 };
00047 v4six U_GG = { -74, 0, -74, 0 };
00048 v4six V_RBRB = { -18, 112, -18, 112 };
00049 v4six V_GG = { -94, 0, -94, 0 };
00050
00051 v4six YY = { 16, 0, 16, 0 };
00052 v4six UV = { 128, 0, 128, 0 };
00053
00054
00055
00056 #define MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC_ALPHA \
00057 __asm__ __volatile__ ( \
00058 "###########################################\n\t" \
00059 "# load src: x1 -> mm0, x2 -> mm1, A -> mm2 \n\t" \
00060 "movq %[src], %%mm0 \n\t" \
00061 "movq %%mm0, %%mm1 \n\t" \
00062 "pand %[X1], %%mm0 \n\t" \
00063 "psrlw $8, %%mm1 \n\t" \
00064 "movq %%mm1, %%mm2 \n\t" \
00065 "psrld $16, %%mm2 \n\t" \
00066 "###########################################\n\t" \
00067 : \
00068 : [src] "m" (*ssrc->i), [X1] "m" (*X1) \
00069 ); \
00070 __asm__ __volatile__ ( \
00071 "###########################################\n\t" \
00072 "# calc Y in mm3 \n\t" \
00073 "movq %%mm0, %%mm3 \n\t" \
00074 "pmaddwd %[Y_RBRB], %%mm3 \n\t" \
00075 "movq %%mm1, %%mm7 \n\t" \
00076 "pmaddwd %[Y_GG], %%mm7 \n\t" \
00077 "paddd %%mm7, %%mm3 \n\t" \
00078 "psrld $8, %%mm3 \n\t" \
00079 "paddd %[YY], %%mm3 \n\t" \
00080 "pmullw %%mm2, %%mm3 \n\t" \
00081 "###########################################\n\t" \
00082 : \
00083 : [Y_RBRB] "m" (*Y_RBRB), [Y_GG] "m" (*Y_GG), [YY] "m" (*YY) \
00084 ); \
00085 __asm__ __volatile__ ( \
00086 "###########################################\n\t" \
00087 "# calc U in mm4 \n\t" \
00088 "movq %%mm0, %%mm4 \n\t" \
00089 "pmaddwd %[U_RBRB], %%mm4 \n\t" \
00090 "movq %%mm1, %%mm7 \n\t" \
00091 "pmaddwd %[U_GG], %%mm7 \n\t" \
00092 "paddd %%mm7, %%mm4 \n\t" \
00093 "psrld $8, %%mm4 \n\t" \
00094 "paddd %[UV], %%mm4 \n\t" \
00095 "pmullw %%mm2, %%mm4 \n\t" \
00096 "###########################################\n\t" \
00097 : \
00098 : [U_RBRB] "m" (*U_RBRB), [U_GG] "m" (*U_GG), [UV] "m" (*UV) \
00099 ); \
00100 __asm__ __volatile__ ( \
00101 "###########################################\n\t" \
00102 "# calc V in mm5 \n\t" \
00103 "movq %%mm0, %%mm5 \n\t" \
00104 "pmaddwd %[V_RBRB], %%mm5 \n\t" \
00105 "movq %%mm1, %%mm7 \n\t" \
00106 "pmaddwd %[V_GG], %%mm7 \n\t" \
00107 "paddd %%mm7, %%mm5 \n\t" \
00108 "psrld $8, %%mm5 \n\t" \
00109 "paddd %[UV], %%mm5 \n\t" \
00110 "pmullw %%mm2, %%mm5 \n\t" \
00111 "###########################################\n\t" \
00112 : \
00113 : [V_RBRB] "m" (*V_RBRB), [V_GG] "m" (*V_GG), [UV] "m" (*UV) \
00114 ); \
00115 __asm__ __volatile__ ( \
00116 "###########################################\n\t" \
00117 "# calc A in mm2 \n\t" \
00118 "movq %%mm2, %%mm7 \n\t" \
00119 "movq %[TTTT], %%mm2 \n\t" \
00120 "psubd %%mm7, %%mm2 \n\t" \
00121 "###########################################\n\t" \
00122 "# important: clear mm7!!! \n\t" \
00123 "pxor %%mm7, %%mm7 \n\t" \
00124 "###########################################\n\t" \
00125 : \
00126 : [TTTT] "m" (*TTTT) \
00127 );
00128
00129
00130
00131
00132 #define MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC \
00133 __asm__ __volatile__ ( \
00134 "###########################################\n\t" \
00135 "# load src: x1 -> mm0, x2 -> mm1 \n\t" \
00136 "movq %[src], %%mm0 \n\t" \
00137 "movq %%mm0, %%mm1 \n\t" \
00138 "pand %[X1], %%mm0 \n\t" \
00139 "psrlw $8, %%mm1 \n\t" \
00140 "###########################################\n\t" \
00141 : \
00142 : [src] "m" (*ssrc->i), [X1] "m" (*X1) \
00143 ); \
00144 __asm__ __volatile__ ( \
00145 "###########################################\n\t" \
00146 "# calc Y in mm3 \n\t" \
00147 "movq %%mm0, %%mm3 \n\t" \
00148 "pmaddwd %[Y_RBRB], %%mm3 \n\t" \
00149 "movq %%mm1, %%mm7 \n\t" \
00150 "pmaddwd %[Y_GG], %%mm7 \n\t" \
00151 "paddd %%mm7, %%mm3 \n\t" \
00152 "psrld $8, %%mm3 \n\t" \
00153 "paddd %[YY], %%mm3 \n\t" \
00154 "###########################################\n\t" \
00155 : \
00156 : [Y_RBRB] "m" (*Y_RBRB), [Y_GG] "m" (*Y_GG), [YY] "m" (*YY) \
00157 ); \
00158 __asm__ __volatile__ ( \
00159 "###########################################\n\t" \
00160 "# calc U in mm4 \n\t" \
00161 "movq %%mm0, %%mm4 \n\t" \
00162 "pmaddwd %[U_RBRB], %%mm4 \n\t" \
00163 "movq %%mm1, %%mm7 \n\t" \
00164 "pmaddwd %[U_GG], %%mm7 \n\t" \
00165 "paddd %%mm7, %%mm4 \n\t" \
00166 "psrld $8, %%mm4 \n\t" \
00167 "paddd %[UV], %%mm4 \n\t" \
00168 "###########################################\n\t" \
00169 : \
00170 : [U_RBRB] "m" (*U_RBRB), [U_GG] "m" (*U_GG), [UV] "m" (*UV) \
00171 ); \
00172 __asm__ __volatile__ ( \
00173 "###########################################\n\t" \
00174 "# calc V in mm5 \n\t" \
00175 "movq %%mm0, %%mm5 \n\t" \
00176 "pmaddwd %[V_RBRB], %%mm5 \n\t" \
00177 "movq %%mm1, %%mm7 \n\t" \
00178 "pmaddwd %[V_GG], %%mm7 \n\t" \
00179 "paddd %%mm7, %%mm5 \n\t" \
00180 "psrld $8, %%mm5 \n\t" \
00181 "paddd %[UV], %%mm5 \n\t" \
00182 "###########################################\n\t" \
00183 "# important: clear mm7!!! \n\t" \
00184 "pxor %%mm7, %%mm7 \n\t" \
00185 "###########################################\n\t" \
00186 : \
00187 : [V_RBRB] "m" (*V_RBRB), [V_GG] "m" (*V_GG), [UV] "m" (*UV) \
00188 );
00189
00190
00191
00192
00193
00194
00195 #endif
00196
00197
00198 void mmsfb_blit_blend_argb_to_yv12(MMSFBExternalSurfaceBuffer *extbuf, int src_height, int sx, int sy, int sw, int sh,
00199 unsigned char *dst, int dst_pitch, int dst_height, int dx, int dy) {
00200
00201
00202 static bool firsttime = true;
00203 if (firsttime) {
00204 printf("DISKO: Using accelerated blend ARGB to YV12.\n");
00205 firsttime = false;
00206 }
00207
00208
00209 unsigned int *src = (unsigned int *)extbuf->ptr;
00210 int src_pitch = extbuf->pitch;
00211
00212
00213 int src_pitch_pix = src_pitch >> 2;
00214 int dst_pitch_pix = dst_pitch;
00215 int dst_pitch_pix_half = dst_pitch_pix >> 1;
00216
00217 src+= sx + sy * src_pitch_pix;
00218
00219
00220 if (dst_pitch_pix - dx < sw - sx)
00221 sw = dst_pitch_pix - dx - sx;
00222 if (dst_height - dy < sh - sy)
00223 sh = dst_height - dy - sy;
00224 if ((sw <= 0)||(sh <= 0))
00225 return;
00226
00227 unsigned int OLDSRC = (*src) + 1;
00228
00229 unsigned int old_y;
00230 unsigned int old_u;
00231 unsigned int old_v;
00232
00233 int src_pixels = src_pitch_pix * sh;
00234
00235
00236 bool odd_left = (dx & 0x01);
00237 bool odd_top = (dy & 0x01);
00238 bool odd_right = ((dx + sw) & 0x01);
00239 bool odd_bottom = ((dy + sh) & 0x01);
00240
00241
00242 unsigned char *dst_y = dst + dx + dy * dst_pitch_pix;
00243 unsigned char *dst_u = dst + dst_pitch_pix * dst_height + dst_pitch_pix_half * (dst_height >> 1) + (dx >> 1) + (dy >> 1) * dst_pitch_pix_half;
00244 unsigned char *dst_v = dst + dst_pitch_pix * dst_height + (dx >> 1) + (dy >> 1) * dst_pitch_pix_half;
00245
00246
00247 unsigned int dst_y2_offs = 1;
00248 unsigned int dst_y3_offs = dst_pitch;
00249 unsigned int src2_offs = 1;
00250 unsigned int src3_offs = src_pitch_pix;
00251
00252
00253 register unsigned int d_u;
00254 register unsigned int d_v;
00255
00256
00257 if (odd_top && odd_left) {
00258
00259 register unsigned int SRC;
00260 register unsigned int A;
00261
00262
00263 d_u = (*dst_u) * 3;
00264 d_v = (*dst_v) * 3;
00265
00266
00267 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00268
00269
00270 *dst_u = d_u >> 2;
00271 *dst_v = d_v >> 2;
00272 }
00273
00274 if (odd_top && odd_right) {
00275
00276 MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00277
00278
00279 src += sw - 1;
00280 dst_y += sw - 1;
00281 if (odd_left) {
00282 dst_u += sw >> 1;
00283 dst_v += sw >> 1;
00284 }
00285 else {
00286 dst_u += (sw - 1) >> 1;
00287 dst_v += (sw - 1) >> 1;
00288 }
00289
00290 register unsigned int SRC;
00291 register unsigned int A;
00292
00293
00294 d_u = (*dst_u) * 3;
00295 d_v = (*dst_v) * 3;
00296
00297
00298 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00299
00300
00301 *dst_u = d_u >> 2;
00302 *dst_v = d_v >> 2;
00303
00304
00305 MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00306 }
00307
00308 if (odd_bottom && odd_left) {
00309
00310 MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00311
00312
00313 src += src_pitch_pix * (sh-1);
00314 dst_y += dst_pitch_pix * (sh-1);
00315 if (odd_top) {
00316 dst_u += dst_pitch_pix_half * (sh >> 1);
00317 dst_v += dst_pitch_pix_half * (sh >> 1);
00318 }
00319 else {
00320 dst_u += dst_pitch_pix_half * ((sh-1) >> 1);
00321 dst_v += dst_pitch_pix_half * ((sh-1) >> 1);
00322 }
00323
00324 register unsigned int SRC;
00325 register unsigned int A;
00326
00327
00328 d_u = (*dst_u) * 3;
00329 d_v = (*dst_v) * 3;
00330
00331
00332 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00333
00334
00335 *dst_u = d_u >> 2;
00336 *dst_v = d_v >> 2;
00337
00338
00339 MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00340 }
00341
00342 if (odd_bottom && odd_right) {
00343
00344 MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00345
00346
00347 src += src_pitch_pix * (sh-1);
00348 dst_y += dst_pitch_pix * (sh-1);
00349 if (odd_top) {
00350 dst_u += dst_pitch_pix_half * (sh >> 1);
00351 dst_v += dst_pitch_pix_half * (sh >> 1);
00352 }
00353 else {
00354 dst_u += dst_pitch_pix_half * ((sh-1) >> 1);
00355 dst_v += dst_pitch_pix_half * ((sh-1) >> 1);
00356 }
00357
00358
00359 src += sw - 1;
00360 dst_y += sw - 1;
00361 if (odd_left) {
00362 dst_u += sw >> 1;
00363 dst_v += sw >> 1;
00364 }
00365 else {
00366 dst_u += (sw - 1) >> 1;
00367 dst_v += (sw - 1) >> 1;
00368 }
00369
00370 register unsigned int SRC;
00371 register unsigned int A;
00372
00373
00374 d_u = (*dst_u) * 3;
00375 d_v = (*dst_v) * 3;
00376
00377
00378 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00379
00380
00381 *dst_u = d_u >> 2;
00382 *dst_v = d_v >> 2;
00383
00384
00385 MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00386 }
00387
00388 if (odd_top) {
00389
00390 MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00391
00392
00393 unsigned int *line_end = src + sw;
00394 if (odd_left) {
00395 src++;
00396 dst_y++;
00397 dst_u++;
00398 dst_v++;
00399 line_end--;
00400 }
00401 if (odd_right)
00402 line_end--;
00403
00404
00405 while (src < line_end) {
00406 register unsigned int SRC;
00407 register unsigned int A;
00408
00409
00410 d_u = (*dst_u) << 1;
00411 d_v = (*dst_v) << 1;
00412
00413
00414 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00415 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src2_offs], dst_y[dst_y2_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00416
00417
00418 *dst_u = d_u >> 2;
00419 *dst_v = d_v >> 2;
00420
00421
00422 src+=2;
00423 dst_y+=2;
00424 dst_u++;
00425 dst_v++;
00426 }
00427
00428
00429 MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00430 }
00431
00432 if (odd_bottom) {
00433
00434 MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00435
00436
00437 src += src_pitch_pix * (sh-1);
00438 dst_y += dst_pitch_pix * (sh-1);
00439 if (odd_top) {
00440 dst_u += dst_pitch_pix_half * (sh >> 1);
00441 dst_v += dst_pitch_pix_half * (sh >> 1);
00442 }
00443 else {
00444 dst_u += dst_pitch_pix_half * ((sh-1) >> 1);
00445 dst_v += dst_pitch_pix_half * ((sh-1) >> 1);
00446 }
00447
00448 unsigned int *line_end = src + sw;
00449 if (odd_left) {
00450 src++;
00451 dst_y++;
00452 dst_u++;
00453 dst_v++;
00454 line_end--;
00455 }
00456 if (odd_right)
00457 line_end--;
00458
00459
00460 while (src < line_end) {
00461 register unsigned int SRC;
00462 register unsigned int A;
00463
00464
00465 d_u = (*dst_u) << 1;
00466 d_v = (*dst_v) << 1;
00467
00468
00469 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00470 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src2_offs], dst_y[dst_y2_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00471
00472
00473 *dst_u = d_u >> 2;
00474 *dst_v = d_v >> 2;
00475
00476
00477 src+=2;
00478 dst_y+=2;
00479 dst_u++;
00480 dst_v++;
00481 }
00482
00483
00484 MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00485 }
00486
00487 if (odd_left) {
00488
00489 MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00490
00491
00492 unsigned int *src_end = src + src_pixels;
00493 int src_pitch_diff = src_pitch_pix << 1;
00494 int dst_pitch_diff = dst_pitch_pix << 1;
00495 int dst_pitch_uvdiff = dst_pitch_pix_half;
00496 if (odd_top) {
00497 src += src_pitch_pix;
00498 src_end -= src_pitch_pix;
00499 dst_y += dst_pitch_pix;
00500 dst_u += dst_pitch_pix_half;
00501 dst_v += dst_pitch_pix_half;
00502 }
00503 if (odd_bottom)
00504 src_end -= src_pitch_pix;
00505
00506
00507 while (src < src_end) {
00508
00509 register unsigned int SRC;
00510 register unsigned int A;
00511
00512
00513 d_u = (*dst_u) << 1;
00514 d_v = (*dst_v) << 1;
00515
00516
00517 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00518 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src3_offs], dst_y[dst_y3_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00519
00520
00521 *dst_u = d_u >> 2;
00522 *dst_v = d_v >> 2;
00523
00524
00525 src += src_pitch_diff;
00526 dst_y += dst_pitch_diff;
00527 dst_u += dst_pitch_uvdiff;
00528 dst_v += dst_pitch_uvdiff;
00529 }
00530
00531
00532 MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00533 }
00534
00535 if (odd_right) {
00536
00537 MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00538
00539
00540 unsigned int *src_end = src + src_pixels;
00541 int src_pitch_diff = src_pitch_pix << 1;
00542 int dst_pitch_diff = dst_pitch_pix << 1;
00543 int dst_pitch_uvdiff = dst_pitch_pix_half;
00544 src += sw - 1;
00545 dst_y += sw - 1;
00546 if (odd_left) {
00547 dst_u += sw >> 1;
00548 dst_v += sw >> 1;
00549 }
00550 else {
00551 dst_u += (sw - 1) >> 1;
00552 dst_v += (sw - 1) >> 1;
00553 }
00554 if (odd_top) {
00555 src += src_pitch_pix;
00556 src_end -= src_pitch_pix;
00557 dst_y += dst_pitch_pix;
00558 dst_u += dst_pitch_pix_half;
00559 dst_v += dst_pitch_pix_half;
00560 }
00561 if (odd_bottom)
00562 src_end -= src_pitch_pix;
00563
00564
00565 while (src < src_end) {
00566
00567 register unsigned int SRC;
00568 register unsigned int A;
00569
00570
00571 d_u = (*dst_u) << 1;
00572 d_v = (*dst_v) << 1;
00573
00574
00575 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00576 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src3_offs], dst_y[dst_y3_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00577
00578
00579 *dst_u = d_u >> 2;
00580 *dst_v = d_v >> 2;
00581
00582
00583 src += src_pitch_diff;
00584 dst_y += dst_pitch_diff;
00585 dst_u += dst_pitch_uvdiff;
00586 dst_v += dst_pitch_uvdiff;
00587 }
00588
00589
00590 MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00591 }
00592
00593
00594 if (odd_top) {
00595
00596 dy++;
00597 sh--;
00598 src+=src_pitch_pix;
00599 src_pixels-=src_pitch_pix;
00600 dst_y+=dst_pitch;
00601 dst_u+=dst_pitch >> 1;
00602 dst_v+=dst_pitch >> 1;
00603 }
00604
00605 if (odd_bottom) {
00606
00607 src_height--;
00608 src_pixels-=src_pitch_pix;
00609 }
00610
00611 if (odd_left) {
00612
00613 dx++;
00614 sw--;
00615 src++;
00616 dst_y++;
00617 dst_u++;
00618 dst_v++;
00619 }
00620
00621 if (odd_right) {
00622
00623 sw--;
00624 }
00625
00626
00627
00628
00629 #ifndef __HAVE_SSE__
00630 unsigned int dst_y4_offs = dst_y3_offs + 1;
00631 unsigned int src4_offs = src3_offs + 1;
00632
00633
00634 unsigned int *src_end = src + src_pixels;
00635 int src_pitch_diff = (src_pitch_pix << 1) - sw;
00636 int dst_pitch_diff = (dst_pitch_pix << 1) - sw;
00637 int dst_pitch_uvdiff = (dst_pitch_pix - sw) >> 1;
00638
00639
00640 while (src < src_end) {
00641
00642 unsigned int *line_end = src + sw;
00643
00644
00645 while (src < line_end) {
00646 register unsigned int SRC;
00647 register unsigned int A;
00648
00649
00650 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u=, d_v=);
00651 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src2_offs], dst_y[dst_y2_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00652 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src3_offs], dst_y[dst_y3_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00653 MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src4_offs], dst_y[dst_y4_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00654
00655
00656 *dst_u = d_u >> 2;
00657 *dst_v = d_v >> 2;
00658
00659
00660 src +=2;
00661 dst_y+=2;
00662 dst_u++;
00663 dst_v++;
00664 }
00665
00666
00667 src += src_pitch_diff;
00668 dst_y += dst_pitch_diff;
00669 dst_u += dst_pitch_uvdiff;
00670 dst_v += dst_pitch_uvdiff;
00671 }
00672
00673 #else
00674
00675
00676
00677 static v4six TTTT = { 0x100,0,0x100,0 };
00678
00679
00680
00681
00682
00683 _v4si *src_end = (_v4si *)(src + src_pixels);
00684 _v4si *ssrc = (_v4si *)src;
00685 int src_pitch_diff = (src_pitch_pix << 1) - sw;
00686 int dst_pitch_diff = (dst_pitch_pix << 1) - sw;
00687 int dst_pitch_uvdiff = (dst_pitch_pix - sw) >> 1;
00688
00689
00690 src3_offs = src3_offs>>1;
00691 sw = sw >> 1;
00692 src_pitch_diff = src_pitch_diff >> 1;
00693
00694
00695
00696 int src3_offsX = src3_offs-1;
00697 int dst_y3_offsX = dst_y3_offs-2;
00698
00699 _v4si OLDSRC_MMX;
00700 OLDSRC_MMX.i[0] = ssrc->i[0]+1;
00701 OLDSRC_MMX.i[1] = ssrc->i[1]+1;
00702
00703
00704 while (ssrc < src_end) {
00705
00706 _v4si *line_end = ssrc + sw;
00707
00708
00709 while (ssrc < line_end) {
00710 if ((ssrc->c[3]==0xff)&&(ssrc->c[7]==0xff)) {
00711
00712 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00713
00714 MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC;
00715 OLDSRC_MMX = *ssrc;
00716 }
00717
00718 __asm__ __volatile__ (
00719 "###########################################\n\t"
00720 "# save the two Y values \n\t"
00721 "pextrw $0, %%mm3, %%eax \n\t"
00722 "pextrw $2, %%mm3, %%ecx \n\t"
00723 "mov %%cl, %%ah \n\t"
00724 "mov %%ax, %[dst_y] \n\t"
00725 "###########################################\n\t"
00726 "# load reg mm0 with the U value \n\t"
00727 "movq %%mm4, %%mm0 \n\t"
00728 "psadbw %%mm7, %%mm0 \n\t"
00729 "# save the U result in mm6 \n\t"
00730 "movq %%mm0, %%mm6 \n\t"
00731 "###########################################\n\t"
00732 "# load reg mm0 with the V value \n\t"
00733 "movq %%mm5, %%mm0 \n\t"
00734 "psadbw %%mm7, %%mm0 \n\t"
00735 "pextrw $0, %%mm0, %%eax \n\t"
00736 "# save the V result in mm6 \n\t"
00737 "pinsrw $2, %%eax, %%mm6 \n\t"
00738 "###########################################\n\t"
00739 : [dst_y] "=m" (*dst_y)
00740 :
00741 : "cc", "%eax", "%ecx"
00742 );
00743
00744 }
00745 else
00746 if ((!ssrc->c[3])&&(!ssrc->c[7])) {
00747
00748 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00749
00750 OLDSRC_MMX = *ssrc;
00751 }
00752
00753
00754 __asm__ __volatile__ (
00755 "###########################################\n\t"
00756 "# load reg eax with the U value \n\t"
00757 "xor %%eax, %%eax \n\t"
00758 "mov %[dst_u], %%al \n\t"
00759 "# calc U * 2 \n\t"
00760 "shl $1, %%ax \n\t"
00761 "# save the U result in mm6 \n\t"
00762 "pinsrw $0, %%eax, %%mm6 \n\t"
00763 "###########################################\n\t"
00764 "# load reg eax with the V value \n\t"
00765 "xor %%eax, %%eax \n\t"
00766 "mov %[dst_v], %%al \n\t"
00767 "# calc V * 2 \n\t"
00768 "shl $1, %%ax \n\t"
00769 "# save the V result in mm6 \n\t"
00770 "pinsrw $2, %%eax, %%mm6 \n\t"
00771 "###########################################\n\t"
00772 :
00773 : [dst_u] "m" (*dst_u), [dst_v] "m" (*dst_v)
00774 : "cc", "%eax"
00775 );
00776 }
00777 else {
00778 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00779
00780 MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC_ALPHA;
00781 OLDSRC_MMX = *ssrc;
00782 }
00783 __asm__ __volatile__ (
00784 "###########################################\n\t"
00785 "# load reg mm0 with the two Y values \n\t"
00786 "pxor %%mm0, %%mm0 \n\t"
00787 "mov %[dst_y], %%ax \n\t"
00788 "mov %%ax, %%cx \n\t"
00789 "xor %%ah, %%ah \n\t"
00790 "shr $8, %%cx \n\t"
00791 "pinsrw $0, %%eax, %%mm0 \n\t"
00792 "pinsrw $2, %%ecx, %%mm0 \n\t"
00793 "# calc Y \n\t"
00794 "pmullw %%mm2, %%mm0 \n\t"
00795 "paddw %%mm3, %%mm0 \n\t"
00796 "psrlw $8, %%mm0 \n\t"
00797 "# save the two Y results \n\t"
00798 "pextrw $0, %%mm0, %%eax \n\t"
00799 "pextrw $2, %%mm0, %%ecx \n\t"
00800 "mov %%cl, %%ah \n\t"
00801 "mov %%ax, %[dst_y] \n\t"
00802 "###########################################\n\t"
00803 : [dst_y] "+m" (*dst_y)
00804 :
00805 : "cc", "%eax", "%ecx"
00806 );
00807
00808 __asm__ __volatile__ (
00809 "###########################################\n\t"
00810 "# load reg mm0 with the U value \n\t"
00811 "xor %%eax, %%eax \n\t"
00812 "mov %[dst_u], %%al \n\t"
00813 "pinsrw $0, %%eax, %%mm0 \n\t"
00814 "pinsrw $2, %%eax, %%mm0 \n\t"
00815 "# calc U \n\t"
00816 "pmullw %%mm2, %%mm0 \n\t"
00817 "paddw %%mm4, %%mm0 \n\t"
00818 "psrlw $8, %%mm0 \n\t"
00819 "psadbw %%mm7, %%mm0 \n\t"
00820 "# save the U result in mm6 \n\t"
00821 "movq %%mm0, %%mm6 \n\t"
00822 "###########################################\n\t"
00823 "# load reg mm0 with the V value \n\t"
00824 "xor %%eax, %%eax \n\t"
00825 "mov %[dst_v], %%al \n\t"
00826 "pinsrw $0, %%eax, %%mm0 \n\t"
00827 "pinsrw $2, %%eax, %%mm0 \n\t"
00828 "# calc V \n\t"
00829 "pmullw %%mm2, %%mm0 \n\t"
00830 "paddw %%mm5, %%mm0 \n\t"
00831 "psrlw $8, %%mm0 \n\t"
00832 "psadbw %%mm7, %%mm0 \n\t"
00833 "# save the V result in mm6 \n\t"
00834 "pextrw $0, %%mm0, %%eax \n\t"
00835 "pinsrw $2, %%eax, %%mm6 \n\t"
00836 "###########################################\n\t"
00837 :
00838 : [dst_u] "m" (*dst_u), [dst_v] "m" (*dst_v)
00839 : "cc", "%eax"
00840 );
00841 }
00842
00843 ssrc+=src3_offs;
00844 dst_y+=dst_y3_offs;
00845
00846 if ((ssrc->c[3]==0xff)&&(ssrc->c[7]==0xff)) {
00847
00848 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00849
00850 MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC;
00851 OLDSRC_MMX = *ssrc;
00852
00853
00854 __asm__ __volatile__ (
00855 "###########################################\n\t"
00856 "# load reg mm0 with the U value \n\t"
00857 "movq %%mm4, %%mm0 \n\t"
00858 "psadbw %%mm7, %%mm0 \n\t"
00859 "# save the U result to memory \n\t"
00860 "paddw %%mm6, %%mm0 \n\t"
00861 "pextrw $0, %%mm0, %%eax \n\t"
00862 "shr $2, %%eax \n\t"
00863 "mov %%al, %[dst_u] \n\t"
00864 "###########################################\n\t"
00865 "# load reg mm0 with the V value \n\t"
00866 "movq %%mm5, %%mm0 \n\t"
00867 "psadbw %%mm7, %%mm0 \n\t"
00868 "# save the V result to memory \n\t"
00869 "pextrw $0, %%mm0, %%eax \n\t"
00870 "pextrw $2, %%mm6, %%ecx \n\t"
00871 "add %%ecx, %%eax \n\t"
00872 "shr $2, %%eax \n\t"
00873 "mov %%al, %[dst_v] \n\t"
00874 "###########################################\n\t"
00875 : [dst_u] "=m" (*dst_u), [dst_v] "=m" (*dst_v)
00876 :
00877 : "cc", "%eax", "%ecx"
00878 );
00879 }
00880 else {
00881
00882
00883
00884 __asm__ __volatile__ (
00885 "###########################################\n\t"
00886 "# save the U result to memory \n\t"
00887 "pextrw $0, %%mm6, %%eax \n\t"
00888 "shr $1, %%eax \n\t"
00889 "mov %%al, %[dst_u] \n\t"
00890 "###########################################\n\t"
00891 "# save the V result to memory \n\t"
00892 "pextrw $2, %%mm6, %%eax \n\t"
00893 "shr $1, %%eax \n\t"
00894 "mov %%al, %[dst_v] \n\t"
00895 "###########################################\n\t"
00896 : [dst_u] "=m" (*dst_u), [dst_v] "=m" (*dst_v)
00897 :
00898 : "cc", "%eax"
00899 );
00900 }
00901
00902
00903 __asm__ __volatile__ (
00904 "###########################################\n\t"
00905 "# save the two Y values \n\t"
00906 "pextrw $0, %%mm3, %%eax \n\t"
00907 "pextrw $2, %%mm3, %%ecx \n\t"
00908 "mov %%cl, %%ah \n\t"
00909 "mov %%ax, %[dst_y] \n\t"
00910 "###########################################\n\t"
00911 : [dst_y] "=m" (*dst_y)
00912 :
00913 : "cc", "%eax", "%ecx"
00914 );
00915 }
00916 else
00917 if ((!ssrc->c[3])&&(!ssrc->c[7])) {
00918
00919 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00920
00921 __asm__ __volatile__ (
00922 "###########################################\n\t"
00923 "# load reg eax with the U value \n\t"
00924 "xor %%eax, %%eax \n\t"
00925 "mov %[dst_u], %%al \n\t"
00926 "# calc U * 2 \n\t"
00927 "shl $1, %%ax \n\t"
00928 "# save the U result to memory \n\t"
00929 "pextrw $0, %%mm6, %%ecx \n\t"
00930 "add %%ecx, %%eax \n\t"
00931 "shr $2, %%eax \n\t"
00932 "mov %%al, %[dst_u] \n\t"
00933 "###########################################\n\t"
00934 : [dst_u] "+m" (*dst_u)
00935 :
00936 : "cc", "%eax", "%ecx"
00937 );
00938
00939 __asm__ __volatile__ (
00940 "###########################################\n\t"
00941 "# load reg eax with the V value \n\t"
00942 "xor %%eax, %%eax \n\t"
00943 "mov %[dst_v], %%al \n\t"
00944 "# calc V * 2 \n\t"
00945 "shl $1, %%ax \n\t"
00946 "# save the V result to memory \n\t"
00947 "pextrw $2, %%mm6, %%ecx \n\t"
00948 "add %%ecx, %%eax \n\t"
00949 "shr $2, %%eax \n\t"
00950 "mov %%al, %[dst_v] \n\t"
00951 "###########################################\n\t"
00952 : [dst_v] "+m" (*dst_v)
00953 :
00954 : "cc", "%eax", "%ecx"
00955 );
00956 }
00957 }
00958 else {
00959
00960 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00961
00962 MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC_ALPHA;
00963 OLDSRC_MMX = *ssrc;
00964
00965
00966 __asm__ __volatile__ (
00967 "###########################################\n\t"
00968 "# load reg mm0 with the U value \n\t"
00969 "xor %%eax, %%eax \n\t"
00970 "mov %[dst_u], %%al \n\t"
00971 "pinsrw $0, %%eax, %%mm0 \n\t"
00972 "pinsrw $2, %%eax, %%mm0 \n\t"
00973 "# calc U \n\t"
00974 "pmullw %%mm2, %%mm0 \n\t"
00975 "paddw %%mm4, %%mm0 \n\t"
00976 "psrlw $8, %%mm0 \n\t"
00977 "psadbw %%mm7, %%mm0 \n\t"
00978 "# save the U result to memory \n\t"
00979 "paddw %%mm6, %%mm0 \n\t"
00980 "pextrw $0, %%mm0, %%eax \n\t"
00981 "shr $2, %%eax \n\t"
00982 "mov %%al, %[dst_u] \n\t"
00983 "###########################################\n\t"
00984 : [dst_u] "+m" (*dst_u)
00985 :
00986 : "cc", "%eax"
00987 );
00988
00989
00990 __asm__ __volatile__ (
00991 "###########################################\n\t"
00992 "# load reg mm0 with the V value \n\t"
00993 "xor %%eax, %%eax \n\t"
00994 "mov %[dst_v], %%al \n\t"
00995 "pinsrw $0, %%eax, %%mm0 \n\t"
00996 "pinsrw $2, %%eax, %%mm0 \n\t"
00997 "# calc V \n\t"
00998 "pmullw %%mm2, %%mm0 \n\t"
00999 "paddw %%mm5, %%mm0 \n\t"
01000 "psrlw $8, %%mm0 \n\t"
01001 "psadbw %%mm7, %%mm0 \n\t"
01002 "# save the V result to memory \n\t"
01003 "pextrw $0, %%mm0, %%eax \n\t"
01004 "pextrw $2, %%mm6, %%ecx \n\t"
01005 "add %%ecx, %%eax \n\t"
01006 "shr $2, %%eax \n\t"
01007 "mov %%al, %[dst_v] \n\t"
01008 "###########################################\n\t"
01009 : [dst_v] "+m" (*dst_v)
01010 :
01011 : "cc", "%eax", "%ecx"
01012 );
01013 }
01014 else {
01015
01016
01017
01018 __asm__ __volatile__ (
01019 "###########################################\n\t"
01020 "# save the U result to memory \n\t"
01021 "pextrw $0, %%mm6, %%eax \n\t"
01022 "shr $1, %%eax \n\t"
01023 "mov %%al, %[dst_u] \n\t"
01024 "###########################################\n\t"
01025 "# save the V result to memory \n\t"
01026 "pextrw $2, %%mm6, %%eax \n\t"
01027 "shr $1, %%eax \n\t"
01028 "mov %%al, %[dst_v] \n\t"
01029 "###########################################\n\t"
01030 : [dst_u] "=m" (*dst_u), [dst_v] "=m" (*dst_v)
01031 :
01032 : "cc", "%eax"
01033 );
01034 }
01035
01036
01037 __asm__ __volatile__ (
01038 "###########################################\n\t"
01039 "# load reg mm0 with the two Y values \n\t"
01040 "pxor %%mm0, %%mm0 \n\t"
01041 "mov %[dst_y], %%ax \n\t"
01042 "mov %%ax, %%cx \n\t"
01043 "xor %%ah, %%ah \n\t"
01044 "shr $8, %%cx \n\t"
01045 "pinsrw $0, %%eax, %%mm0 \n\t"
01046 "pinsrw $2, %%ecx, %%mm0 \n\t"
01047 "# calc Y \n\t"
01048 "pmullw %%mm2, %%mm0 \n\t"
01049 "paddw %%mm3, %%mm0 \n\t"
01050 "psrlw $8, %%mm0 \n\t"
01051 "# save the two Y results \n\t"
01052 "pextrw $0, %%mm0, %%eax \n\t"
01053 "pextrw $2, %%mm0, %%ecx \n\t"
01054 "mov %%cl, %%ah \n\t"
01055 "mov %%ax, %[dst_y] \n\t"
01056 : [dst_y] "+m" (*dst_y)
01057 :
01058 : "cc", "%eax", "%ecx"
01059 );
01060
01061 }
01062
01063
01064
01065
01066 ssrc-=src3_offsX;
01067 dst_y-=dst_y3_offsX;
01068 dst_u++;
01069 dst_v++;
01070 }
01071
01072
01073 ssrc += src_pitch_diff;
01074 dst_y += dst_pitch_diff;
01075 dst_u += dst_pitch_uvdiff;
01076 dst_v += dst_pitch_uvdiff;
01077 }
01078
01079
01080 __asm__ __volatile__ (
01081 "###########################################\n\t"
01082 "# clear the MMX state \n\t"
01083 "emms \n\t"
01084 "###########################################\n\t"
01085 );
01086 #endif
01087
01088 }
01089
01090 #endif
01091 #endif