diff options
| author | Dana Jansens <danakj@orodu.net> | 2008-02-10 16:49:16 -0500 |
|---|---|---|
| committer | Mikael Magnusson <mikachu@comhem.se> | 2008-02-14 19:40:02 +0100 |
| commit | 1d00d9947067da76ac4d8d0a6b9ef2c28e73349e (patch) | |
| tree | aedd5d32e7b1a63ffed131181e110924ef53c7ec /render/gradient.c | |
| parent | 18a35e04914bf8d108cf7a9d46970f13620ef534 (diff) | |
use memcpy's to make splitvertical gradient much faster - using log n memcpy's is much quicker than setting a pointer value n times
Here are some profiling results. splitvertical1 is the original code, splitvertical2 is some slight improvements in locality for it, and splitvertical3 is the new O(log n) memcpy code
% cumulative self self total
time seconds seconds calls ms/call ms/call name
49.44 0.88 0.88 1063 0.83 0.83 gradient_splitvertical1
47.19 1.72 0.84 1063 0.79 0.79 gradient_splitvertical2
2.81 1.77 0.05 1063 0.05 0.05 gradient_splitvertical3
i also tested this with 'time' to draw 1000 gradients, and the new code used approximately half the user time, and finished 10 seconds quicker. so yeah, it's magical and works well.
Diffstat (limited to 'render/gradient.c')
| -rw-r--r-- | render/gradient.c | 78 |
1 files changed, 59 insertions, 19 deletions
diff --git a/render/gradient.c b/render/gradient.c index 6439b301..bbd2a5c9 100644 --- a/render/gradient.c +++ b/render/gradient.c @@ -425,8 +425,7 @@ static void gradient_splitvertical(RrAppearance *a, gint w, gint h) { gint x, y1, y2, y3; RrSurface *sf = &a->surface; - RrPixel32 *data = sf->pixel_data; - RrPixel32 current; + RrPixel32 *data, *start; gint y1sz, y2sz, y3sz; VARS(y1); @@ -455,28 +454,69 @@ static void gradient_splitvertical(RrAppearance *a, gint w, gint h) } SETUP(y3, sf->secondary, sf->split_secondary, y3sz); - for (y1 = y1sz; y1 > 0; --y1) { - current = COLOR(y1); - for (x = w - 1; x >= 0; --x) - *(data++) = current; + /* find the color for the first pixel of each row first */ + data = sf->pixel_data; + for (y1 = y1sz-1; y1 > 0; --y1) { + *data = COLOR(y1); + data += w; NEXT(y1); } - - for (y2 = y2sz; y2 > 0; --y2) { - current = COLOR(y2); - for (x = w - 1; x >= 0; --x) - *(data++) = current; - + *data = COLOR(y1); + data += w; + for (y2 = y2sz-1; y2 > 0; --y2) { + *data = COLOR(y2); + data += w; NEXT(y2); } + *data = COLOR(y2); + data += w; + for (y3 = y3sz-1; y3 > 0; --y3) { + *data = COLOR(y3); + data += w; + NEXT(y3); + } + *data = COLOR(y3); - for (y3 = y3sz; y3 > 0; --y3) { - current = COLOR(y3); - for (x = w - 1; x >= 0; --x) - *(data++) = current; + /* copy the first pixels into the whole rows */ - NEXT(y3); + start = sf->pixel_data; + data = start + 1; + + for (y1 = h; y1 > 0; --y1) { + /* for really small things, just copy ourselves */ + if (w < 8) { + for (x = w-1; x > 0; --x) + *(data++) = *start; + } + /* for >= 8, then use O(log n) memcpy's... */ + else { + gint len = 4; + gint lenbytes = 4 * sizeof(RrPixel32); + + /* copy the first 3 * 32 bits (3 words) ourselves - then we have + 3 + the original 1 = 4 words to make copies of at a time + + this is faster than doing memcpy for 1 or 2 words at a time + */ + for (x = 3; x > 0; --x) + *(data++) = *start; + + for (x = w - 4; x > 0;) { + memcpy(data, start, lenbytes); + x -= len; + data += len; + len <<= 1; + lenbytes <<= 1; + if (len > x) { + len = x; + lenbytes = x * sizeof(RrPixel32); + } + } + } + + start += w; + ++data; } } @@ -551,13 +591,13 @@ static void gradient_vertical(RrSurface *sf, gint w, gint h) for (y = h - 1; y > 0; --y) { /* 0 -> h-1 */ current = COLOR(y); - for (x = w - 1; x >= 0; --x) /* 0 -> w */ + for (x = w; x > 0; --x) /* 0 -> w */ *(data++) = current; NEXT(y); } current = COLOR(y); - for (x = w - 1; x >= 0; --x) /* 0 -> w */ + for (x = w; x > 0; --x) /* 0 -> w */ *(data++) = current; } |
