#include #include #include #include /*************************************************/ struct bitmap_t { size_t bmWidth; size_t bmHeight; size_t bmWidthBytes; void **bmBits; int bmBitsPixel; int bmPlanes; int bmTypes; }; typedef struct bitmap_t BITMAP; // substitute for windows BITMAP type typedef unsigned char uchar; typedef unsigned int uint; typedef unsigned short ushort; #define min(x,y) ((x) <= (y) ? (x) : (y)) /*************************************************/ unsigned char Clip_base[256*3]; unsigned char* Clip = Clip_base + 256; const int c2y_cyb = int(0.114*219/255*65536+0.5); const int c2y_cyg = int(0.587*219/255*65536+0.5); const int c2y_cyr = int(0.299*219/255*65536+0.5); const int c2y_cu = int(1.0/2.018*1024+0.5); const int c2y_cv = int(1.0/1.596*1024+0.5); int c2y_yb[256]; int c2y_yg[256]; int c2y_yr[256]; const int y2c_cbu = int(2.018*65536+0.5); const int y2c_cgu = int(0.391*65536+0.5); const int y2c_cgv = int(0.813*65536+0.5); const int y2c_crv = int(1.596*65536+0.5); int y2c_bu[256]; int y2c_gu[256]; int y2c_gv[256]; int y2c_rv[256]; const int cy_cy = int(255.0/219.0*65536+0.5); const int cy_cy2 = int(255.0/219.0*32768+0.5); const int cy_cy4 = int(255.0/219.0*16384+0.5); bool fColorConvInitOK = false; void ColorConvInit() { if(fColorConvInitOK) return; int i; for(i = 0; i < 256; i++) { Clip_base[i] = 0; Clip_base[i+256] = i; Clip_base[i+512] = 255; } for(i = 0; i < 256; i++) { c2y_yb[i] = c2y_cyb*i; c2y_yg[i] = c2y_cyg*i; c2y_yr[i] = c2y_cyr*i; y2c_bu[i] = y2c_cbu*(i-128); y2c_gu[i] = y2c_cgu*(i-128); y2c_gv[i] = y2c_cgv*(i-128); y2c_rv[i] = y2c_crv*(i-128); } fColorConvInitOK = true; } #define rgb2yv12(r1,g1,b1,r2,g2,b2,r3,g3,b3,r4,g4,b4) \ int y1 = (c2y_yb[b1] + c2y_yg[g1] + c2y_yr[r1] + 0x108000) >> 16; \ int y2 = (c2y_yb[b2] + c2y_yg[g2] + c2y_yr[r2] + 0x108000) >> 16; \ int y3 = (c2y_yb[b3] + c2y_yg[g3] + c2y_yr[r3] + 0x108000) >> 16; \ int y4 = (c2y_yb[b4] + c2y_yg[g4] + c2y_yr[r4] + 0x108000) >> 16; \ \ int scaled_y = (y1+y2+y3+y4-64) * cy_cy4; \ \ unsigned char u = Clip[(((((b1+b2+b3+b4)<<14) - scaled_y) >> 10) * c2y_cu + 0x800000 + 0x8000) >> 16]; \ unsigned char v = Clip[(((((r1+r2+r3+r4)<<14) - scaled_y) >> 10) * c2y_cv + 0x800000 + 0x8000) >> 16]; \ void RGBA32_YV12AlphaBlend(BITMAP& dst, BITMAP& src) { ColorConvInit(); int w = min(src.bmWidth, dst.bmWidth) & ~1; int h = min(src.bmHeight, dst.bmHeight); uchar* sptr = (uchar*)src.bmBits + src.bmWidthBytes*(src.bmHeight-1); uchar* dptr = (uchar*)dst.bmBits; uchar* vptr = dptr + (dst.bmWidthBytes*dst.bmHeight); uchar* uptr = vptr + (dst.bmWidthBytes*dst.bmHeight)/4; for(; h > 0; h -= 2, sptr -= src.bmWidthBytes*2, dptr += dst.bmWidthBytes*2, vptr += dst.bmWidthBytes/2, uptr += dst.bmWidthBytes/2) { uchar* s1 = sptr; uchar* s2 = s1 - src.bmWidthBytes; uchar* e = s1 + w*4; uchar* d1 = dptr; uchar* d2 = d1 + dst.bmWidthBytes; uchar* vr = vptr; uchar* ur = uptr; for(; s1 < e; s1 += 8, s2 += 8, d1+=2, d2+=2, ur++, vr++) { int aa = (s1[3]+s1[7]+s2[3]+s2[7])>>2; if(aa < 0xff) { rgb2yv12(s1[2],s1[1],s1[0],s1[6],s1[5],s1[4], s2[2],s2[1],s2[0],s2[6],s2[5],s2[4]); d1[0] = (((d1[0]-0x10) * (s1[3]>>1)) >> 7) + y1; d1[1] = (((d1[1]-0x10) * (s1[7]>>1)) >> 7) + y2; d2[0] = (((d2[0]-0x10) * (s2[3]>>1)) >> 7) + y3; d2[1] = (((d2[1]-0x10) * (s2[7]>>1)) >> 7) + y4; ur[0] = (((ur[0]-0x80) * (aa>>1)) >> 7) + u; vr[0] = (((vr[0]-0x80) * (aa>>1)) >> 7) + v; } #if 0 *((uint*)s1) = 0xff000000; *((uint*)s1+1) = 0xff000000; *((uint*)s2) = 0xff000000; *((uint*)s2+1) = 0xff000000; #endif } }; } /*************************************************/ #define PW 0x00ffffff #define PB 0x0 #define T (0xff<<24) #define Pb 0x00ff0000 #define Pr 0x000000ff #define Pg 0x0000ff00 #define D 0x88 #define Db (Pb | (D<<24)) #define Dr (Pr | (D<<24)) #define Dg (Pg | (D<<24)) #define WIDTH 640 #define HEIGHT 480 static inline void set_row_rgb(uint *bmr, size_t width, uint v) { for(int i = 0; i < width; ++i) bmr[i] = v; } static void setup_rgb_worst(BITMAP &bmp) { uint *bitmap = (uint*) malloc(sizeof(uint) * WIDTH * HEIGHT); int i = 0; for(; i < (HEIGHT/10)*1; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,PW); for(; i < (HEIGHT/10)*2; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,Pb); for(; i < (HEIGHT/10)*3; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,Pr); for(; i < (HEIGHT/10)*4; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,Pg); for(; i < (HEIGHT/10)*5; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,Db); for(; i < (HEIGHT/10)*6; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,Dr); for(; i < (HEIGHT/10)*7; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,Dg); for(; i < (HEIGHT/10)*8; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,PB); for(; i < (HEIGHT/10)*9; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,PW); for(; i < (HEIGHT/10)*10; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,PB); bmp.bmWidth = WIDTH; bmp.bmWidthBytes = WIDTH*sizeof(uint); bmp.bmHeight = HEIGHT; bmp.bmBits = (void **)bitmap; // ignore other fields } static void setup_rgb_best(BITMAP &bmp) { uint *bitmap = (uint*) malloc(sizeof(uint) * WIDTH * HEIGHT); for(int i = 0; i < HEIGHT; ++i) set_row_rgb(bitmap + (i*WIDTH),WIDTH,T); bmp.bmWidth = WIDTH; bmp.bmWidthBytes = WIDTH*sizeof(uint); bmp.bmHeight = HEIGHT; bmp.bmBits = (void **)bitmap; } static void setup_yv12(BITMAP &buf) { uchar *data = (uchar*) malloc(WIDTH * HEIGHT + (WIDTH * HEIGHT)/2); buf.bmWidth = WIDTH; buf.bmWidthBytes = WIDTH; buf.bmHeight = HEIGHT; buf.bmBits = (void **)data; } #define CALLS 1000 void time_func(char *name, void (*func)(BITMAP&,BITMAP&), BITMAP &b1, BITMAP &b2) { struct timeval begin, end; int i; gettimeofday(&begin,NULL); for(i = 0; i < CALLS; ++i) func(b1,b2); gettimeofday(&end,NULL); printf("%s = %.2f usec per call\n",name, (double) ((end.tv_sec - begin.tv_sec) * 1000000 + (end.tv_usec - begin.tv_usec)) / (double) CALLS); } int main(int argc, char *argv[]) { BITMAP rgb_w, rgb_b, yv12; setup_rgb_worst(rgb_w); setup_rgb_best(rgb_b); setup_yv12(yv12); printf("WIDTH: %d, HEIGHT: %d\n",WIDTH,HEIGHT); time_func("RGBA32_YV12AlphaBlend (worst)",RGBA32_YV12AlphaBlend,yv12,rgb_w); time_func("RGBA32_YV12AlphaBlend (best)",RGBA32_YV12AlphaBlend,yv12,rgb_b); return 0; }