00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 #include "mmsgui/fb/mmsfbconv.h"
00034 
00035 #ifdef __HAVE_PF_ARGB__
00036 #ifdef __HAVE_PF_YV12__
00037 
00038 #include "mmstools/mmstools.h"
00039 
00040 
00041 #ifdef __HAVE_SSE__
00042 
00043     v4si X1 = { 0x00ff00ff, 0x00ff00ff };
00044     v4six Y_RBRB = { 25, 66, 25, 66 };
00045     v4six Y_GG   = { 129, 0, 129, 0 };
00046     v4six U_RBRB = { 112, -38, 112, -38 };
00047     v4six U_GG   = { -74, 0, -74, 0 };
00048     v4six V_RBRB = { -18, 112, -18, 112 };
00049     v4six V_GG   = { -94, 0, -94, 0 };
00050 
00051     v4six YY = { 16, 0, 16, 0 };
00052     v4six UV = { 128, 0, 128, 0 };
00053 
00054 
00055 
00056 #define MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC_ALPHA                    \
00057             __asm__ __volatile__ (                                      \
00058                     "###########################################\n\t"   \
00059                     "# load src: x1 -> mm0, x2 -> mm1, A -> mm2 \n\t"   \
00060                     "movq       %[src],     %%mm0               \n\t"   \
00061                     "movq       %%mm0,      %%mm1               \n\t"   \
00062                     "pand       %[X1],      %%mm0               \n\t"   \
00063                     "psrlw      $8,         %%mm1               \n\t"   \
00064                     "movq       %%mm1,      %%mm2               \n\t"   \
00065                     "psrld      $16,        %%mm2               \n\t"   \
00066                     "###########################################\n\t"   \
00067                     :                                   \
00068                     : [src] "m" (*ssrc->i), [X1] "m" (*X1)              \
00069                     );                                                  \
00070             __asm__ __volatile__ (                                      \
00071                     "###########################################\n\t"   \
00072                     "# calc Y in mm3                            \n\t"   \
00073                     "movq       %%mm0,      %%mm3               \n\t"   \
00074                     "pmaddwd    %[Y_RBRB],  %%mm3               \n\t"   \
00075                     "movq       %%mm1,      %%mm7               \n\t"   \
00076                     "pmaddwd    %[Y_GG],    %%mm7               \n\t"   \
00077                     "paddd      %%mm7,      %%mm3               \n\t"   \
00078                     "psrld      $8,         %%mm3               \n\t"   \
00079                     "paddd      %[YY],      %%mm3               \n\t"   \
00080                     "pmullw     %%mm2,      %%mm3               \n\t"   \
00081                     "###########################################\n\t"   \
00082                     :                                   \
00083                     : [Y_RBRB] "m" (*Y_RBRB), [Y_GG] "m" (*Y_GG), [YY] "m" (*YY)    \
00084                     );                                                  \
00085             __asm__ __volatile__ (                                      \
00086                     "###########################################\n\t"   \
00087                     "# calc U in mm4                            \n\t"   \
00088                     "movq       %%mm0,      %%mm4               \n\t"   \
00089                     "pmaddwd    %[U_RBRB],  %%mm4               \n\t"   \
00090                     "movq       %%mm1,      %%mm7               \n\t"   \
00091                     "pmaddwd    %[U_GG],    %%mm7               \n\t"   \
00092                     "paddd      %%mm7,      %%mm4               \n\t"   \
00093                     "psrld      $8,         %%mm4               \n\t"   \
00094                     "paddd      %[UV],      %%mm4               \n\t"   \
00095                     "pmullw     %%mm2,      %%mm4               \n\t"   \
00096                     "###########################################\n\t"   \
00097                     :                                   \
00098                     : [U_RBRB] "m" (*U_RBRB), [U_GG] "m" (*U_GG), [UV] "m" (*UV)    \
00099                     );                                                  \
00100             __asm__ __volatile__ (                                      \
00101                     "###########################################\n\t"   \
00102                     "# calc V in mm5                            \n\t"   \
00103                     "movq       %%mm0,      %%mm5               \n\t"   \
00104                     "pmaddwd    %[V_RBRB],  %%mm5               \n\t"   \
00105                     "movq       %%mm1,      %%mm7               \n\t"   \
00106                     "pmaddwd    %[V_GG],    %%mm7               \n\t"   \
00107                     "paddd      %%mm7,      %%mm5               \n\t"   \
00108                     "psrld      $8,         %%mm5               \n\t"   \
00109                     "paddd      %[UV],      %%mm5               \n\t"   \
00110                     "pmullw     %%mm2,      %%mm5               \n\t"   \
00111                     "###########################################\n\t"   \
00112                     :                                   \
00113                     : [V_RBRB] "m" (*V_RBRB), [V_GG] "m" (*V_GG), [UV] "m" (*UV)    \
00114                     );                                                  \
00115             __asm__ __volatile__ (                                      \
00116                     "###########################################\n\t"   \
00117                     "# calc A in mm2                            \n\t"   \
00118                     "movq       %%mm2,      %%mm7               \n\t"   \
00119                     "movq       %[TTTT],    %%mm2               \n\t"   \
00120                     "psubd      %%mm7,      %%mm2               \n\t"   \
00121                     "###########################################\n\t"   \
00122                     "# important: clear mm7!!!                  \n\t"   \
00123                     "pxor       %%mm7,      %%mm7               \n\t"   \
00124                     "###########################################\n\t"   \
00125                     :                                   \
00126                     : [TTTT] "m" (*TTTT)                                \
00127                     );
00128 
00129 
00130 
00131 
00132 #define MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC                          \
00133             __asm__ __volatile__ (                                      \
00134                     "###########################################\n\t"   \
00135                     "# load src: x1 -> mm0, x2 -> mm1           \n\t"   \
00136                     "movq       %[src],     %%mm0               \n\t"   \
00137                     "movq       %%mm0,      %%mm1               \n\t"   \
00138                     "pand       %[X1],      %%mm0               \n\t"   \
00139                     "psrlw      $8,         %%mm1               \n\t"   \
00140                     "###########################################\n\t"   \
00141                     :                                   \
00142                     : [src] "m" (*ssrc->i), [X1] "m" (*X1)              \
00143                     );                                                  \
00144             __asm__ __volatile__ (                                      \
00145                     "###########################################\n\t"   \
00146                     "# calc Y in mm3                            \n\t"   \
00147                     "movq       %%mm0,      %%mm3               \n\t"   \
00148                     "pmaddwd    %[Y_RBRB],  %%mm3               \n\t"   \
00149                     "movq       %%mm1,      %%mm7               \n\t"   \
00150                     "pmaddwd    %[Y_GG],    %%mm7               \n\t"   \
00151                     "paddd      %%mm7,      %%mm3               \n\t"   \
00152                     "psrld      $8,         %%mm3               \n\t"   \
00153                     "paddd      %[YY],      %%mm3               \n\t"   \
00154                     "###########################################\n\t"   \
00155                     :                                   \
00156                     : [Y_RBRB] "m" (*Y_RBRB), [Y_GG] "m" (*Y_GG), [YY] "m" (*YY)    \
00157                     );                                                  \
00158             __asm__ __volatile__ (                                      \
00159                     "###########################################\n\t"   \
00160                     "# calc U in mm4                            \n\t"   \
00161                     "movq       %%mm0,      %%mm4               \n\t"   \
00162                     "pmaddwd    %[U_RBRB],  %%mm4               \n\t"   \
00163                     "movq       %%mm1,      %%mm7               \n\t"   \
00164                     "pmaddwd    %[U_GG],    %%mm7               \n\t"   \
00165                     "paddd      %%mm7,      %%mm4               \n\t"   \
00166                     "psrld      $8,         %%mm4               \n\t"   \
00167                     "paddd      %[UV],      %%mm4               \n\t"   \
00168                     "###########################################\n\t"   \
00169                     :                                   \
00170                     : [U_RBRB] "m" (*U_RBRB), [U_GG] "m" (*U_GG), [UV] "m" (*UV)    \
00171                     );                                                  \
00172             __asm__ __volatile__ (                                      \
00173                     "###########################################\n\t"   \
00174                     "# calc V in mm5                            \n\t"   \
00175                     "movq       %%mm0,      %%mm5               \n\t"   \
00176                     "pmaddwd    %[V_RBRB],  %%mm5               \n\t"   \
00177                     "movq       %%mm1,      %%mm7               \n\t"   \
00178                     "pmaddwd    %[V_GG],    %%mm7               \n\t"   \
00179                     "paddd      %%mm7,      %%mm5               \n\t"   \
00180                     "psrld      $8,         %%mm5               \n\t"   \
00181                     "paddd      %[UV],      %%mm5               \n\t"   \
00182                     "###########################################\n\t"   \
00183                     "# important: clear mm7!!!                  \n\t"   \
00184                     "pxor       %%mm7,      %%mm7               \n\t"   \
00185                     "###########################################\n\t"   \
00186                     :                                   \
00187                     : [V_RBRB] "m" (*V_RBRB), [V_GG] "m" (*V_GG), [UV] "m" (*UV)    \
00188                     );
00189 
00190 
00191 
00192 
00193 
00194 
00195 #endif
00196 
00197 
00198 void mmsfb_blit_blend_argb_to_yv12(MMSFBExternalSurfaceBuffer *extbuf, int src_height, int sx, int sy, int sw, int sh,
00199                                    unsigned char *dst, int dst_pitch, int dst_height, int dx, int dy) {
00200 
00201     
00202     static bool firsttime = true;
00203     if (firsttime) {
00204         printf("DISKO: Using accelerated blend ARGB to YV12.\n");
00205         firsttime = false;
00206     }
00207 
00208     
00209     unsigned int *src = (unsigned int *)extbuf->ptr;
00210     int src_pitch = extbuf->pitch;
00211 
00212     
00213     int  src_pitch_pix      = src_pitch >> 2;
00214     int dst_pitch_pix       = dst_pitch;
00215     int dst_pitch_pix_half  = dst_pitch_pix >> 1;
00216 
00217     src+= sx + sy * src_pitch_pix;
00218 
00219     
00220     if (dst_pitch_pix - dx < sw - sx)
00221         sw = dst_pitch_pix - dx - sx;
00222     if (dst_height - dy < sh - sy)
00223         sh = dst_height - dy - sy;
00224     if ((sw <= 0)||(sh <= 0))
00225         return;
00226 
00227     unsigned int OLDSRC  = (*src) + 1;
00228 
00229     unsigned int old_y;
00230     unsigned int old_u;
00231     unsigned int old_v;
00232 
00233     int  src_pixels = src_pitch_pix * sh;
00234 
00235     
00236     bool odd_left   = (dx & 0x01);
00237     bool odd_top    = (dy & 0x01);
00238     bool odd_right  = ((dx + sw) & 0x01);
00239     bool odd_bottom = ((dy + sh) & 0x01);
00240 
00241     
00242     unsigned char *dst_y = dst + dx + dy * dst_pitch_pix;
00243     unsigned char *dst_u = dst + dst_pitch_pix * dst_height + dst_pitch_pix_half * (dst_height >> 1) + (dx >> 1) + (dy >> 1) * dst_pitch_pix_half;
00244     unsigned char *dst_v = dst + dst_pitch_pix * dst_height                                          + (dx >> 1) + (dy >> 1) * dst_pitch_pix_half;
00245 
00246     
00247     unsigned int dst_y2_offs = 1;
00248     unsigned int dst_y3_offs = dst_pitch;
00249     unsigned int src2_offs = 1;
00250     unsigned int src3_offs = src_pitch_pix;
00251 
00252     
00253     register unsigned int d_u;
00254     register unsigned int d_v;
00255 
00256     
00257     if (odd_top && odd_left) {
00258         
00259         register unsigned int SRC;
00260         register unsigned int A;
00261 
00262         
00263         d_u = (*dst_u) * 3;
00264         d_v = (*dst_v) * 3;
00265 
00266         
00267         MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00268 
00269         
00270         *dst_u = d_u >> 2;
00271         *dst_v = d_v >> 2;
00272     }
00273 
00274     if (odd_top && odd_right) {
00275         
00276         MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00277 
00278         
00279         src   += sw - 1;
00280         dst_y += sw - 1;
00281         if (odd_left) {
00282             dst_u += sw >> 1;
00283             dst_v += sw >> 1;
00284         }
00285         else {
00286             dst_u += (sw - 1) >> 1;
00287             dst_v += (sw - 1) >> 1;
00288         }
00289 
00290         register unsigned int SRC;
00291         register unsigned int A;
00292 
00293         
00294         d_u = (*dst_u) * 3;
00295         d_v = (*dst_v) * 3;
00296 
00297         
00298         MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00299 
00300         
00301         *dst_u = d_u >> 2;
00302         *dst_v = d_v >> 2;
00303 
00304         
00305         MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00306     }
00307 
00308     if (odd_bottom && odd_left) {
00309         
00310         MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00311 
00312         
00313         src   += src_pitch_pix * (sh-1);
00314         dst_y += dst_pitch_pix * (sh-1);
00315         if (odd_top) {
00316             dst_u += dst_pitch_pix_half * (sh >> 1);
00317             dst_v += dst_pitch_pix_half * (sh >> 1);
00318         }
00319         else {
00320             dst_u += dst_pitch_pix_half * ((sh-1) >> 1);
00321             dst_v += dst_pitch_pix_half * ((sh-1) >> 1);
00322         }
00323 
00324         register unsigned int SRC;
00325         register unsigned int A;
00326 
00327         
00328         d_u = (*dst_u) * 3;
00329         d_v = (*dst_v) * 3;
00330 
00331         
00332         MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00333 
00334         
00335         *dst_u = d_u >> 2;
00336         *dst_v = d_v >> 2;
00337 
00338         
00339         MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00340     }
00341 
00342     if (odd_bottom && odd_right) {
00343         
00344         MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00345 
00346         
00347         src   += src_pitch_pix * (sh-1);
00348         dst_y += dst_pitch_pix * (sh-1);
00349         if (odd_top) {
00350             dst_u += dst_pitch_pix_half * (sh >> 1);
00351             dst_v += dst_pitch_pix_half * (sh >> 1);
00352         }
00353         else {
00354             dst_u += dst_pitch_pix_half * ((sh-1) >> 1);
00355             dst_v += dst_pitch_pix_half * ((sh-1) >> 1);
00356         }
00357 
00358         
00359         src   += sw - 1;
00360         dst_y += sw - 1;
00361         if (odd_left) {
00362             dst_u += sw >> 1;
00363             dst_v += sw >> 1;
00364         }
00365         else {
00366             dst_u += (sw - 1) >> 1;
00367             dst_v += (sw - 1) >> 1;
00368         }
00369 
00370         register unsigned int SRC;
00371         register unsigned int A;
00372 
00373         
00374         d_u = (*dst_u) * 3;
00375         d_v = (*dst_v) * 3;
00376 
00377         
00378         MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00379 
00380         
00381         *dst_u = d_u >> 2;
00382         *dst_v = d_v >> 2;
00383 
00384         
00385         MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00386     }
00387 
00388     if (odd_top) {
00389         
00390         MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00391 
00392         
00393         unsigned int *line_end = src + sw;
00394         if (odd_left) {
00395             src++;
00396             dst_y++;
00397             dst_u++;
00398             dst_v++;
00399             line_end--;
00400         }
00401         if (odd_right)
00402             line_end--;
00403 
00404         
00405         while (src < line_end) {
00406             register unsigned int SRC;
00407             register unsigned int A;
00408 
00409             
00410             d_u = (*dst_u) << 1;
00411             d_v = (*dst_v) << 1;
00412 
00413             
00414             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00415             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src2_offs], dst_y[dst_y2_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00416 
00417             
00418             *dst_u = d_u >> 2;
00419             *dst_v = d_v >> 2;
00420 
00421             
00422             src+=2;
00423             dst_y+=2;
00424             dst_u++;
00425             dst_v++;
00426         }
00427 
00428         
00429         MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00430     }
00431 
00432     if (odd_bottom) {
00433         
00434         MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00435 
00436         
00437         src   += src_pitch_pix * (sh-1);
00438         dst_y += dst_pitch_pix * (sh-1);
00439         if (odd_top) {
00440             dst_u += dst_pitch_pix_half * (sh >> 1);
00441             dst_v += dst_pitch_pix_half * (sh >> 1);
00442         }
00443         else {
00444             dst_u += dst_pitch_pix_half * ((sh-1) >> 1);
00445             dst_v += dst_pitch_pix_half * ((sh-1) >> 1);
00446         }
00447 
00448         unsigned int *line_end = src + sw;
00449         if (odd_left) {
00450             src++;
00451             dst_y++;
00452             dst_u++;
00453             dst_v++;
00454             line_end--;
00455         }
00456         if (odd_right)
00457             line_end--;
00458 
00459         
00460         while (src < line_end) {
00461             register unsigned int SRC;
00462             register unsigned int A;
00463 
00464             
00465             d_u = (*dst_u) << 1;
00466             d_v = (*dst_v) << 1;
00467 
00468             
00469             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00470             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src2_offs], dst_y[dst_y2_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00471 
00472             
00473             *dst_u = d_u >> 2;
00474             *dst_v = d_v >> 2;
00475 
00476             
00477             src+=2;
00478             dst_y+=2;
00479             dst_u++;
00480             dst_v++;
00481         }
00482 
00483         
00484         MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00485     }
00486 
00487     if (odd_left) {
00488         
00489         MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00490 
00491         
00492         unsigned int *src_end = src + src_pixels;
00493         int src_pitch_diff    = src_pitch_pix << 1;
00494         int dst_pitch_diff    = dst_pitch_pix << 1;
00495         int dst_pitch_uvdiff  = dst_pitch_pix_half;
00496         if (odd_top) {
00497             src     += src_pitch_pix;
00498             src_end -= src_pitch_pix;
00499             dst_y   += dst_pitch_pix;
00500             dst_u   += dst_pitch_pix_half;
00501             dst_v   += dst_pitch_pix_half;
00502         }
00503         if (odd_bottom)
00504             src_end -= src_pitch_pix;
00505 
00506         
00507         while (src < src_end) {
00508             
00509             register unsigned int SRC;
00510             register unsigned int A;
00511 
00512             
00513             d_u = (*dst_u) << 1;
00514             d_v = (*dst_v) << 1;
00515 
00516             
00517             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00518             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src3_offs], dst_y[dst_y3_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00519 
00520             
00521             *dst_u = d_u >> 2;
00522             *dst_v = d_v >> 2;
00523 
00524             
00525             src   += src_pitch_diff;
00526             dst_y += dst_pitch_diff;
00527             dst_u += dst_pitch_uvdiff;
00528             dst_v += dst_pitch_uvdiff;
00529         }
00530 
00531         
00532         MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00533     }
00534 
00535     if (odd_right) {
00536         
00537         MMSFB_CONV_BLEND_ARGB_TO_YV12_PUSHPTR;
00538 
00539         
00540         unsigned int *src_end = src + src_pixels;
00541         int src_pitch_diff    = src_pitch_pix << 1;
00542         int dst_pitch_diff    = dst_pitch_pix << 1;
00543         int dst_pitch_uvdiff  = dst_pitch_pix_half;
00544         src   += sw - 1;
00545         dst_y += sw - 1;
00546         if (odd_left) {
00547             dst_u += sw >> 1;
00548             dst_v += sw >> 1;
00549         }
00550         else {
00551             dst_u += (sw - 1) >> 1;
00552             dst_v += (sw - 1) >> 1;
00553         }
00554         if (odd_top) {
00555             src     += src_pitch_pix;
00556             src_end -= src_pitch_pix;
00557             dst_y   += dst_pitch_pix;
00558             dst_u   += dst_pitch_pix_half;
00559             dst_v   += dst_pitch_pix_half;
00560         }
00561         if (odd_bottom)
00562             src_end -= src_pitch_pix;
00563 
00564         
00565         while (src < src_end) {
00566             
00567             register unsigned int SRC;
00568             register unsigned int A;
00569 
00570             
00571             d_u = (*dst_u) << 1;
00572             d_v = (*dst_v) << 1;
00573 
00574             
00575             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u+=, d_v+=);
00576             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src3_offs], dst_y[dst_y3_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00577 
00578             
00579             *dst_u = d_u >> 2;
00580             *dst_v = d_v >> 2;
00581 
00582             
00583             src   += src_pitch_diff;
00584             dst_y += dst_pitch_diff;
00585             dst_u += dst_pitch_uvdiff;
00586             dst_v += dst_pitch_uvdiff;
00587         }
00588 
00589         
00590         MMSFB_CONV_BLEND_ARGB_TO_YV12_POPPTR;
00591     }
00592 
00593     
00594     if (odd_top) {
00595         
00596         dy++;
00597         sh--;
00598         src+=src_pitch_pix;
00599         src_pixels-=src_pitch_pix;
00600         dst_y+=dst_pitch;
00601         dst_u+=dst_pitch >> 1;
00602         dst_v+=dst_pitch >> 1;
00603     }
00604 
00605     if (odd_bottom) {
00606         
00607         src_height--;
00608         src_pixels-=src_pitch_pix;
00609     }
00610 
00611     if (odd_left) {
00612         
00613         dx++;
00614         sw--;
00615         src++;
00616         dst_y++;
00617         dst_u++;
00618         dst_v++;
00619     }
00620 
00621     if (odd_right) {
00622         
00623         sw--;
00624     }
00625 
00626     
00627 
00628 
00629 #ifndef __HAVE_SSE__
00630     unsigned int dst_y4_offs = dst_y3_offs + 1;
00631     unsigned int src4_offs = src3_offs + 1;
00632 
00633     
00634     unsigned int *src_end = src + src_pixels;
00635     int src_pitch_diff = (src_pitch_pix << 1) - sw;
00636     int dst_pitch_diff = (dst_pitch_pix << 1) - sw;
00637     int dst_pitch_uvdiff = (dst_pitch_pix - sw) >> 1;
00638 
00639     
00640     while (src < src_end) {
00641         
00642         unsigned int *line_end = src + sw;
00643 
00644         
00645         while (src < line_end) {
00646             register unsigned int SRC;
00647             register unsigned int A;
00648 
00649             
00650             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(*src, *dst_y, *dst_u, *dst_v, d_u=, d_v=);
00651             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src2_offs], dst_y[dst_y2_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00652             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src3_offs], dst_y[dst_y3_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00653             MMSFB_CONV_BLEND_ARGB_TO_YV12_PIXEL(src[src4_offs], dst_y[dst_y4_offs], *dst_u, *dst_v, d_u+=, d_v+=);
00654 
00655             
00656             *dst_u = d_u >> 2;
00657             *dst_v = d_v >> 2;
00658 
00659             
00660             src  +=2;
00661             dst_y+=2;
00662             dst_u++;
00663             dst_v++;
00664         }
00665 
00666         
00667         src   += src_pitch_diff;
00668         dst_y += dst_pitch_diff;
00669         dst_u += dst_pitch_uvdiff;
00670         dst_v += dst_pitch_uvdiff;
00671     }
00672 
00673 #else
00674 
00675     
00676 
00677     static v4six TTTT = { 0x100,0,0x100,0 };
00678 
00679 
00680 
00681 
00682 
00683     _v4si *src_end = (_v4si *)(src + src_pixels);
00684     _v4si *ssrc = (_v4si *)src;
00685     int src_pitch_diff = (src_pitch_pix << 1) - sw;
00686     int dst_pitch_diff = (dst_pitch_pix << 1) - sw;
00687     int dst_pitch_uvdiff = (dst_pitch_pix - sw) >> 1;
00688 
00689 
00690     src3_offs = src3_offs>>1;
00691     sw = sw >> 1;
00692     src_pitch_diff = src_pitch_diff >> 1;
00693 
00694 
00695 
00696     int src3_offsX = src3_offs-1;
00697     int dst_y3_offsX = dst_y3_offs-2;
00698 
00699     _v4si   OLDSRC_MMX;
00700     OLDSRC_MMX.i[0] = ssrc->i[0]+1;
00701     OLDSRC_MMX.i[1] = ssrc->i[1]+1;
00702 
00703     
00704     while (ssrc < src_end) {
00705         
00706         _v4si *line_end = ssrc + sw;
00707 
00708         
00709         while (ssrc < line_end) {
00710             if ((ssrc->c[3]==0xff)&&(ssrc->c[7]==0xff)) {
00711                 
00712                 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00713                     
00714                     MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC;
00715                     OLDSRC_MMX = *ssrc;
00716                 }
00717 
00718                 __asm__ __volatile__ (
00719                         "###########################################\n\t"
00720                         "# save the two Y values                    \n\t"
00721                         "pextrw     $0,         %%mm3,      %%eax   \n\t"
00722                         "pextrw     $2,         %%mm3,      %%ecx   \n\t"
00723                         "mov        %%cl,       %%ah                \n\t"
00724                         "mov        %%ax,       %[dst_y]            \n\t"
00725                         "###########################################\n\t"
00726                         "# load reg mm0 with the U value            \n\t"
00727                         "movq       %%mm4,      %%mm0               \n\t"
00728                         "psadbw     %%mm7,      %%mm0               \n\t"
00729                         "# save the U result in mm6                 \n\t"
00730                         "movq       %%mm0,      %%mm6               \n\t"
00731                         "###########################################\n\t"
00732                         "# load reg mm0 with the V value            \n\t"
00733                         "movq       %%mm5,      %%mm0               \n\t"
00734                         "psadbw     %%mm7,      %%mm0               \n\t"
00735                         "pextrw     $0,         %%mm0,      %%eax   \n\t"
00736                         "# save the V result in mm6                 \n\t"
00737                         "pinsrw     $2,         %%eax,      %%mm6   \n\t"
00738                         "###########################################\n\t"
00739                         : [dst_y] "=m" (*dst_y)             
00740                         :                                   
00741                         : "cc", "%eax", "%ecx"              
00742                         );
00743 
00744             }
00745             else
00746             if ((!ssrc->c[3])&&(!ssrc->c[7])) {
00747                 
00748                 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00749                     
00750                     OLDSRC_MMX = *ssrc;
00751                 }
00752 
00753                 
00754                 __asm__ __volatile__ (
00755                         "###########################################\n\t"
00756                         "# load reg eax with the U value            \n\t"
00757                         "xor        %%eax,      %%eax               \n\t"
00758                         "mov        %[dst_u],   %%al                \n\t"
00759                         "# calc U * 2                               \n\t"
00760                         "shl        $1,         %%ax                \n\t"
00761                         "# save the U result in mm6                 \n\t"
00762                         "pinsrw     $0,         %%eax,      %%mm6   \n\t"
00763                         "###########################################\n\t"
00764                         "# load reg eax with the V value            \n\t"
00765                         "xor        %%eax,      %%eax               \n\t"
00766                         "mov        %[dst_v],   %%al                \n\t"
00767                         "# calc V * 2                               \n\t"
00768                         "shl        $1,         %%ax                \n\t"
00769                         "# save the V result in mm6                 \n\t"
00770                         "pinsrw     $2,         %%eax,      %%mm6   \n\t"
00771                         "###########################################\n\t"
00772                         :                                               
00773                         : [dst_u] "m" (*dst_u), [dst_v] "m" (*dst_v)    
00774                         : "cc", "%eax"                                  
00775                         );
00776             }
00777             else {
00778                 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00779                     
00780                     MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC_ALPHA;
00781                     OLDSRC_MMX = *ssrc;
00782                 }
00783                 __asm__ __volatile__ (
00784                         "###########################################\n\t"
00785                         "# load reg mm0 with the two Y values       \n\t"
00786                         "pxor       %%mm0,      %%mm0               \n\t"
00787                         "mov        %[dst_y],   %%ax                \n\t"
00788                         "mov        %%ax,       %%cx                \n\t"
00789                         "xor        %%ah,       %%ah                \n\t"
00790                         "shr        $8,         %%cx                \n\t"
00791                         "pinsrw     $0,         %%eax,      %%mm0   \n\t"
00792                         "pinsrw     $2,         %%ecx,      %%mm0   \n\t"
00793                         "# calc Y                                   \n\t"
00794                         "pmullw     %%mm2,      %%mm0               \n\t"
00795                         "paddw      %%mm3,      %%mm0               \n\t"
00796                         "psrlw      $8,         %%mm0               \n\t"
00797                         "# save the two Y results                   \n\t"
00798                         "pextrw     $0,         %%mm0,      %%eax   \n\t"
00799                         "pextrw     $2,         %%mm0,      %%ecx   \n\t"
00800                         "mov        %%cl,       %%ah                \n\t"
00801                         "mov        %%ax,       %[dst_y]            \n\t"
00802                         "###########################################\n\t"
00803                         : [dst_y] "+m" (*dst_y)             
00804                         :                                   
00805                         : "cc", "%eax", "%ecx"              
00806                         );
00807 
00808                 __asm__ __volatile__ (
00809                         "###########################################\n\t"
00810                         "# load reg mm0 with the U value            \n\t"
00811                         "xor        %%eax,      %%eax               \n\t"
00812                         "mov        %[dst_u],   %%al                \n\t"
00813                         "pinsrw     $0,         %%eax,      %%mm0   \n\t"
00814                         "pinsrw     $2,         %%eax,      %%mm0   \n\t"
00815                         "# calc U                                   \n\t"
00816                         "pmullw     %%mm2,      %%mm0               \n\t"
00817                         "paddw      %%mm4,      %%mm0               \n\t"
00818                         "psrlw      $8,         %%mm0               \n\t"
00819                         "psadbw     %%mm7,      %%mm0               \n\t"
00820                         "# save the U result in mm6                 \n\t"
00821                         "movq       %%mm0,      %%mm6               \n\t"
00822                         "###########################################\n\t"
00823                         "# load reg mm0 with the V value            \n\t"
00824                         "xor        %%eax,      %%eax               \n\t"
00825                         "mov        %[dst_v],   %%al                \n\t"
00826                         "pinsrw     $0,         %%eax,      %%mm0   \n\t"
00827                         "pinsrw     $2,         %%eax,      %%mm0   \n\t"
00828                         "# calc V                                   \n\t"
00829                         "pmullw     %%mm2,      %%mm0               \n\t"
00830                         "paddw      %%mm5,      %%mm0               \n\t"
00831                         "psrlw      $8,         %%mm0               \n\t"
00832                         "psadbw     %%mm7,      %%mm0               \n\t"
00833                         "# save the V result in mm6                 \n\t"
00834                         "pextrw     $0,         %%mm0,      %%eax   \n\t"
00835                         "pinsrw     $2,         %%eax,      %%mm6   \n\t"
00836                         "###########################################\n\t"
00837                         :                                               
00838                         : [dst_u] "m" (*dst_u), [dst_v] "m" (*dst_v)    
00839                         : "cc", "%eax"                                  
00840                         );
00841             }
00842 
00843             ssrc+=src3_offs;
00844             dst_y+=dst_y3_offs;
00845 
00846             if ((ssrc->c[3]==0xff)&&(ssrc->c[7]==0xff)) {
00847                 
00848                 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00849                     
00850                     MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC;
00851                     OLDSRC_MMX = *ssrc;
00852 
00853                     
00854                     __asm__ __volatile__ (
00855                             "###########################################\n\t"
00856                             "# load reg mm0 with the U value            \n\t"
00857                             "movq       %%mm4,      %%mm0               \n\t"
00858                             "psadbw     %%mm7,      %%mm0               \n\t"
00859                             "# save the U result to memory              \n\t"
00860                             "paddw      %%mm6,      %%mm0               \n\t"
00861                             "pextrw     $0,         %%mm0,      %%eax   \n\t"
00862                             "shr        $2,         %%eax               \n\t"
00863                             "mov        %%al,       %[dst_u]            \n\t"
00864                             "###########################################\n\t"
00865                             "# load reg mm0 with the V value            \n\t"
00866                             "movq       %%mm5,      %%mm0               \n\t"
00867                             "psadbw     %%mm7,      %%mm0               \n\t"
00868                             "# save the V result to memory              \n\t"
00869                             "pextrw     $0,         %%mm0,      %%eax   \n\t"
00870                             "pextrw     $2,         %%mm6,      %%ecx   \n\t"
00871                             "add        %%ecx,      %%eax               \n\t"
00872                             "shr        $2,         %%eax               \n\t"
00873                             "mov        %%al,       %[dst_v]            \n\t"
00874                             "###########################################\n\t"
00875                             : [dst_u] "=m" (*dst_u), [dst_v] "=m" (*dst_v)  
00876                             :                                               
00877                             : "cc", "%eax", "%ecx"                          
00878                             );
00879                 }
00880                 else {
00881                     
00882 
00883                     
00884                     __asm__ __volatile__ (
00885                             "###########################################\n\t"
00886                             "# save the U result to memory              \n\t"
00887                             "pextrw     $0,         %%mm6,      %%eax   \n\t"
00888                             "shr        $1,         %%eax               \n\t"
00889                             "mov        %%al,       %[dst_u]            \n\t"
00890                             "###########################################\n\t"
00891                             "# save the V result to memory              \n\t"
00892                             "pextrw     $2,         %%mm6,      %%eax   \n\t"
00893                             "shr        $1,         %%eax               \n\t"
00894                             "mov        %%al,       %[dst_v]            \n\t"
00895                             "###########################################\n\t"
00896                             : [dst_u] "=m" (*dst_u), [dst_v] "=m" (*dst_v)  
00897                             :                                               
00898                             : "cc", "%eax"                                  
00899                             );
00900                 }
00901 
00902                 
00903                 __asm__ __volatile__ (
00904                         "###########################################\n\t"
00905                         "# save the two Y values                    \n\t"
00906                         "pextrw     $0,         %%mm3,      %%eax   \n\t"
00907                         "pextrw     $2,         %%mm3,      %%ecx   \n\t"
00908                         "mov        %%cl,       %%ah                \n\t"
00909                         "mov        %%ax,       %[dst_y]            \n\t"
00910                         "###########################################\n\t"
00911                         : [dst_y] "=m" (*dst_y)             
00912                         :                                   
00913                         : "cc", "%eax", "%ecx"              
00914                         );
00915             }
00916             else
00917             if ((!ssrc->c[3])&&(!ssrc->c[7])) {
00918                 
00919                 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00920                     
00921                     __asm__ __volatile__ (
00922                             "###########################################\n\t"
00923                             "# load reg eax with the U value            \n\t"
00924                             "xor        %%eax,      %%eax               \n\t"
00925                             "mov        %[dst_u],   %%al                \n\t"
00926                             "# calc U * 2                               \n\t"
00927                             "shl        $1,         %%ax                \n\t"
00928                             "# save the U result to memory              \n\t"
00929                             "pextrw     $0,         %%mm6,      %%ecx   \n\t"
00930                             "add        %%ecx,      %%eax               \n\t"
00931                             "shr        $2,         %%eax               \n\t"
00932                             "mov        %%al,       %[dst_u]            \n\t"
00933                             "###########################################\n\t"
00934                             : [dst_u] "+m" (*dst_u)             
00935                             :                                   
00936                             : "cc", "%eax", "%ecx"              
00937                             );
00938 
00939                     __asm__ __volatile__ (
00940                             "###########################################\n\t"
00941                             "# load reg eax with the V value            \n\t"
00942                             "xor        %%eax,      %%eax               \n\t"
00943                             "mov        %[dst_v],   %%al                \n\t"
00944                             "# calc V * 2                               \n\t"
00945                             "shl        $1,         %%ax                \n\t"
00946                             "# save the V result to memory              \n\t"
00947                             "pextrw     $2,         %%mm6,      %%ecx   \n\t"
00948                             "add        %%ecx,      %%eax               \n\t"
00949                             "shr        $2,         %%eax               \n\t"
00950                             "mov        %%al,       %[dst_v]            \n\t"
00951                             "###########################################\n\t"
00952                             : [dst_v] "+m" (*dst_v)             
00953                             :                                   
00954                             : "cc", "%eax", "%ecx"              
00955                             );
00956                 }
00957             }
00958             else {
00959                 
00960                 if ((ssrc->i[0] != OLDSRC_MMX.i[0])||(ssrc->i[1] != OLDSRC_MMX.i[1])) {
00961                     
00962                     MMSFB_BLIT_BLEND_ARGB_TO_YV12_LOAD_SRC_ALPHA;
00963                     OLDSRC_MMX = *ssrc;
00964 
00965                     
00966                     __asm__ __volatile__ (
00967                             "###########################################\n\t"
00968                             "# load reg mm0 with the U value            \n\t"
00969                             "xor        %%eax,      %%eax               \n\t"
00970                             "mov        %[dst_u],   %%al                \n\t"
00971                             "pinsrw     $0,         %%eax,      %%mm0   \n\t"
00972                             "pinsrw     $2,         %%eax,      %%mm0   \n\t"
00973                             "# calc U                                   \n\t"
00974                             "pmullw     %%mm2,      %%mm0               \n\t"
00975                             "paddw      %%mm4,      %%mm0               \n\t"
00976                             "psrlw      $8,         %%mm0               \n\t"
00977                             "psadbw     %%mm7,      %%mm0               \n\t"
00978                             "# save the U result to memory              \n\t"
00979                             "paddw      %%mm6,      %%mm0               \n\t"
00980                             "pextrw     $0,         %%mm0,      %%eax   \n\t"
00981                             "shr        $2,         %%eax               \n\t"
00982                             "mov        %%al,       %[dst_u]            \n\t"
00983                             "###########################################\n\t"
00984                             : [dst_u] "+m" (*dst_u)             
00985                             :                                   
00986                             : "cc", "%eax"                      
00987                             );
00988 
00989                     
00990                     __asm__ __volatile__ (
00991                             "###########################################\n\t"
00992                             "# load reg mm0 with the V value            \n\t"
00993                             "xor        %%eax,      %%eax               \n\t"
00994                             "mov        %[dst_v],   %%al                \n\t"
00995                             "pinsrw     $0,         %%eax,      %%mm0   \n\t"
00996                             "pinsrw     $2,         %%eax,      %%mm0   \n\t"
00997                             "# calc V                                   \n\t"
00998                             "pmullw     %%mm2,      %%mm0               \n\t"
00999                             "paddw      %%mm5,      %%mm0               \n\t"
01000                             "psrlw      $8,         %%mm0               \n\t"
01001                             "psadbw     %%mm7,      %%mm0               \n\t"
01002                             "# save the V result to memory              \n\t"
01003                             "pextrw     $0,         %%mm0,      %%eax   \n\t"
01004                             "pextrw     $2,         %%mm6,      %%ecx   \n\t"
01005                             "add        %%ecx,      %%eax               \n\t"
01006                             "shr        $2,         %%eax               \n\t"
01007                             "mov        %%al,       %[dst_v]            \n\t"
01008                             "###########################################\n\t"
01009                             : [dst_v] "+m" (*dst_v)             
01010                             :                                   
01011                             : "cc", "%eax", "%ecx"              
01012                             );
01013                 }
01014                 else {
01015                     
01016 
01017                     
01018                     __asm__ __volatile__ (
01019                             "###########################################\n\t"
01020                             "# save the U result to memory              \n\t"
01021                             "pextrw     $0,         %%mm6,      %%eax   \n\t"
01022                             "shr        $1,         %%eax               \n\t"
01023                             "mov        %%al,       %[dst_u]            \n\t"
01024                             "###########################################\n\t"
01025                             "# save the V result to memory              \n\t"
01026                             "pextrw     $2,         %%mm6,      %%eax   \n\t"
01027                             "shr        $1,         %%eax               \n\t"
01028                             "mov        %%al,       %[dst_v]            \n\t"
01029                             "###########################################\n\t"
01030                             : [dst_u] "=m" (*dst_u), [dst_v] "=m" (*dst_v)  
01031                             :                                               
01032                             : "cc", "%eax"                                  
01033                             );
01034                 }
01035 
01036                 
01037                 __asm__ __volatile__ (
01038                         "###########################################\n\t"
01039                         "# load reg mm0 with the two Y values       \n\t"
01040                         "pxor       %%mm0,      %%mm0               \n\t"
01041                         "mov        %[dst_y],   %%ax                \n\t"
01042                         "mov        %%ax,       %%cx                \n\t"
01043                         "xor        %%ah,       %%ah                \n\t"
01044                         "shr        $8,         %%cx                \n\t"
01045                         "pinsrw     $0,         %%eax,      %%mm0   \n\t"
01046                         "pinsrw     $2,         %%ecx,      %%mm0   \n\t"
01047                         "# calc Y                                   \n\t"
01048                         "pmullw     %%mm2,      %%mm0               \n\t"
01049                         "paddw      %%mm3,      %%mm0               \n\t"
01050                         "psrlw      $8,         %%mm0               \n\t"
01051                         "# save the two Y results                   \n\t"
01052                         "pextrw     $0,         %%mm0,      %%eax   \n\t"
01053                         "pextrw     $2,         %%mm0,      %%ecx   \n\t"
01054                         "mov        %%cl,       %%ah                \n\t"
01055                         "mov        %%ax,       %[dst_y]            \n\t"
01056                         : [dst_y] "+m" (*dst_y)             
01057                         :                                   
01058                         : "cc", "%eax", "%ecx"              
01059                         );
01060 
01061             }
01062 
01063 
01064 
01065             
01066             ssrc-=src3_offsX;
01067             dst_y-=dst_y3_offsX;
01068             dst_u++;
01069             dst_v++;
01070         }
01071 
01072         
01073         ssrc  += src_pitch_diff;
01074         dst_y += dst_pitch_diff;
01075         dst_u += dst_pitch_uvdiff;
01076         dst_v += dst_pitch_uvdiff;
01077     }
01078 
01079 
01080     __asm__ __volatile__ (
01081             "###########################################\n\t"
01082             "# clear the MMX state                      \n\t"
01083             "emms                                       \n\t"
01084             "###########################################\n\t"
01085             );
01086 #endif
01087 
01088 }
01089 
01090 #endif
01091 #endif