blob: ddbdde5037182039414314a34a9e85bfd66ef0af [file] [log] [blame]
Austin Schuh3333ec72022-12-29 16:21:06 -08001/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2All rights reserved.
3This software was developed in the APRIL Robotics Lab under the
4direction of Edwin Olson, ebolson@umich.edu. This software may be
5available under alternative licensing terms; contact the address above.
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
81. Redistributions of source code must retain the above copyright notice, this
9 list of conditions and the following disclaimer.
102. Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the documentation
12 and/or other materials provided with the distribution.
13THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23The views and conclusions contained in the software and documentation are those
24of the authors and should not be interpreted as representing official policies,
25either expressed or implied, of the Regents of The University of Michigan.
26*/
27
28#include <math.h>
29#include <stdint.h>
30
31#ifndef M_PI
32# define M_PI 3.141592653589793238462643383279502884196
33#endif
34
35// 8 bits of fixed-point output
36//
37// This implementation has a worst-case complexity of 22 multiplies
38// and 64 adds. This makes it significantly worse (about 2x) than the
39// best-known fast inverse cosine transform methods. HOWEVER, zero
40// coefficients can be skipped over, and since that's common (often
41// more than half the coefficients are zero).
42//
43// The output is scaled by a factor of 256 (due to our fixed-point
44// integer arithmetic)..
45static inline void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
46{
47 for (int x = 0; x < 8; x++)
48 out[x*outstride] = 0;
49
50 int32_t c;
51
52 c = in[0*instride];
53 if (c) {
54 // 181 181 181 181 181 181 181 181
55 int32_t c181 = c * 181;
56 out[0*outstride] += c181;
57 out[1*outstride] += c181;
58 out[2*outstride] += c181;
59 out[3*outstride] += c181;
60 out[4*outstride] += c181;
61 out[5*outstride] += c181;
62 out[6*outstride] += c181;
63 out[7*outstride] += c181;
64 }
65
66 c = in[1*instride];
67 if (c) {
68 // 251 212 142 49 -49 -142 -212 -251
69 int32_t c251 = c * 251;
70 int32_t c212 = c * 212;
71 int32_t c142 = c * 142;
72 int32_t c49 = c * 49;
73 out[0*outstride] += c251;
74 out[1*outstride] += c212;
75 out[2*outstride] += c142;
76 out[3*outstride] += c49;
77 out[4*outstride] -= c49;
78 out[5*outstride] -= c142;
79 out[6*outstride] -= c212;
80 out[7*outstride] -= c251;
81 }
82
83 c = in[2*instride];
84 if (c) {
85 // 236 97 -97 -236 -236 -97 97 236
86 int32_t c236 = c*236;
87 int32_t c97 = c*97;
88 out[0*outstride] += c236;
89 out[1*outstride] += c97;
90 out[2*outstride] -= c97;
91 out[3*outstride] -= c236;
92 out[4*outstride] -= c236;
93 out[5*outstride] -= c97;
94 out[6*outstride] += c97;
95 out[7*outstride] += c236;
96 }
97
98 c = in[3*instride];
99 if (c) {
100 // 212 -49 -251 -142 142 251 49 -212
101 int32_t c212 = c*212;
102 int32_t c49 = c*49;
103 int32_t c251 = c*251;
104 int32_t c142 = c*142;
105 out[0*outstride] += c212;
106 out[1*outstride] -= c49;
107 out[2*outstride] -= c251;
108 out[3*outstride] -= c142;
109 out[4*outstride] += c142;
110 out[5*outstride] += c251;
111 out[6*outstride] += c49;
112 out[7*outstride] -= c212;
113 }
114
115 c = in[4*instride];
116 if (c) {
117 // 181 -181 -181 181 181 -181 -181 181
118 int32_t c181 = c*181;
119 out[0*outstride] += c181;
120 out[1*outstride] -= c181;
121 out[2*outstride] -= c181;
122 out[3*outstride] += c181;
123 out[4*outstride] += c181;
124 out[5*outstride] -= c181;
125 out[6*outstride] -= c181;
126 out[7*outstride] += c181;
127 }
128
129 c = in[5*instride];
130 if (c) {
131 // 142 -251 49 212 -212 -49 251 -142
132 int32_t c142 = c*142;
133 int32_t c251 = c*251;
134 int32_t c49 = c*49;
135 int32_t c212 = c*212;
136 out[0*outstride] += c142;
137 out[1*outstride] -= c251;
138 out[2*outstride] += c49;
139 out[3*outstride] += c212;
140 out[4*outstride] -= c212;
141 out[5*outstride] -= c49;
142 out[6*outstride] += c251;
143 out[7*outstride] -= c142;
144 }
145
146 c = in[6*instride];
147 if (c) {
148 // 97 -236 236 -97 -97 236 -236 97
149 int32_t c97 = c*97;
150 int32_t c236 = c*236;
151 out[0*outstride] += c97;
152 out[1*outstride] -= c236;
153 out[2*outstride] += c236;
154 out[3*outstride] -= c97;
155 out[4*outstride] -= c97;
156 out[5*outstride] += c236;
157 out[6*outstride] -= c236;
158 out[7*outstride] += c97;
159 }
160
161 c = in[7*instride];
162 if (c) {
163 // 49 -142 212 -251 251 -212 142 -49
164 int32_t c49 = c*49;
165 int32_t c142 = c*142;
166 int32_t c212 = c*212;
167 int32_t c251 = c*251;
168 out[0*outstride] += c49;
169 out[1*outstride] -= c142;
170 out[2*outstride] += c212;
171 out[3*outstride] -= c251;
172 out[4*outstride] += c251;
173 out[5*outstride] -= c212;
174 out[6*outstride] += c142;
175 out[7*outstride] -= c49;
176 }
177}
178
179void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
180{
181 int32_t tmp[64];
182
183 // idct on rows
184 for (int y = 0; y < 8; y++)
185 idct_1D_u32(&in[8*y], 1, &tmp[8*y], 1);
186
187 int32_t tmp2[64];
188
189 // idct on columns
190 for (int x = 0; x < 8; x++)
191 idct_1D_u32(&tmp[x], 8, &tmp2[x], 8);
192
193 // scale, adjust bias, and clamp
194 for (int y = 0; y < 8; y++) {
195 for (int x = 0; x < 8; x++) {
196 int i = 8*y + x;
197
198 // Shift of 18: the divide by 4 as part of the idct, and a shift by 16
199 // to undo the fixed-point arithmetic. (We accumulated 8 bits of
200 // fractional precision during each of the row and column IDCTs)
201 //
202 // Originally:
203 // int32_t v = (tmp2[i] >> 18) + 128;
204 //
205 // Move the add before the shift and we can do rounding at
206 // the same time.
207 const int32_t offset = (128 << 18) + (1 << 17);
208 int32_t v = (tmp2[i] + offset) >> 18;
209
210 if (v < 0)
211 v = 0;
212 if (v > 255)
213 v = 255;
214
215 out[y*outstride + x] = v;
216 }
217 }
218}
219
220///////////////////////////////////////////////////////
221// Below: a "as straight-forward as I can make" implementation.
222static inline void idct_1D_double(double *in, int instride, double *out, int outstride)
223{
224 for (int x = 0; x < 8; x++)
225 out[x*outstride] = 0;
226
227 // iterate over IDCT coefficients
228 double Cu = 1/sqrt(2);
229
230 for (int u = 0; u < 8; u++, Cu = 1) {
231
232 double coeff = in[u*instride];
233 if (coeff == 0)
234 continue;
235
236 for (int x = 0; x < 8; x++)
237 out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff;
238 }
239}
240
241void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
242{
243 double din[64], dout[64];
244 for (int i = 0; i < 64; i++)
245 din[i] = in[i];
246
247 double tmp[64];
248
249 // idct on rows
250 for (int y = 0; y < 8; y++)
251 idct_1D_double(&din[8*y], 1, &tmp[8*y], 1);
252
253 // idct on columns
254 for (int x = 0; x < 8; x++)
255 idct_1D_double(&tmp[x], 8, &dout[x], 8);
256
257 // scale, adjust bias, and clamp
258 for (int y = 0; y < 8; y++) {
259 for (int x = 0; x < 8; x++) {
260 int i = 8*y + x;
261
262 dout[i] = (dout[i] / 4) + 128;
263 if (dout[i] < 0)
264 dout[i] = 0;
265 if (dout[i] > 255)
266 dout[i] = 255;
267
268 // XXX round by adding +.5?
269 out[y*outstride + x] = dout[i];
270 }
271 }
272}
273
274//////////////////////////////////////////////
275static inline unsigned char njClip(const int x) {
276 return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x);
277}
278
279#define W1 2841
280#define W2 2676
281#define W3 2408
282#define W5 1609
283#define W6 1108
284#define W7 565
285
286static inline void njRowIDCT(int* blk) {
287 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
288 if (!((x1 = blk[4] << 11)
289 | (x2 = blk[6])
290 | (x3 = blk[2])
291 | (x4 = blk[1])
292 | (x5 = blk[7])
293 | (x6 = blk[5])
294 | (x7 = blk[3])))
295 {
296 blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
297 return;
298 }
299 x0 = (blk[0] << 11) + 128;
300 x8 = W7 * (x4 + x5);
301 x4 = x8 + (W1 - W7) * x4;
302 x5 = x8 - (W1 + W7) * x5;
303 x8 = W3 * (x6 + x7);
304 x6 = x8 - (W3 - W5) * x6;
305 x7 = x8 - (W3 + W5) * x7;
306 x8 = x0 + x1;
307 x0 -= x1;
308 x1 = W6 * (x3 + x2);
309 x2 = x1 - (W2 + W6) * x2;
310 x3 = x1 + (W2 - W6) * x3;
311 x1 = x4 + x6;
312 x4 -= x6;
313 x6 = x5 + x7;
314 x5 -= x7;
315 x7 = x8 + x3;
316 x8 -= x3;
317 x3 = x0 + x2;
318 x0 -= x2;
319 x2 = (181 * (x4 + x5) + 128) >> 8;
320 x4 = (181 * (x4 - x5) + 128) >> 8;
321 blk[0] = (x7 + x1) >> 8;
322 blk[1] = (x3 + x2) >> 8;
323 blk[2] = (x0 + x4) >> 8;
324 blk[3] = (x8 + x6) >> 8;
325 blk[4] = (x8 - x6) >> 8;
326 blk[5] = (x0 - x4) >> 8;
327 blk[6] = (x3 - x2) >> 8;
328 blk[7] = (x7 - x1) >> 8;
329}
330
331static inline void njColIDCT(const int* blk, unsigned char *out, int stride) {
332 int x0, x1, x2, x3, x4, x5, x6, x7, x8;
333 if (!((x1 = blk[8*4] << 8)
334 | (x2 = blk[8*6])
335 | (x3 = blk[8*2])
336 | (x4 = blk[8*1])
337 | (x5 = blk[8*7])
338 | (x6 = blk[8*5])
339 | (x7 = blk[8*3])))
340 {
341 x1 = njClip(((blk[0] + 32) >> 6) + 128);
342 for (x0 = 8; x0; --x0) {
343 *out = (unsigned char) x1;
344 out += stride;
345 }
346 return;
347 }
348 x0 = (blk[0] << 8) + 8192;
349 x8 = W7 * (x4 + x5) + 4;
350 x4 = (x8 + (W1 - W7) * x4) >> 3;
351 x5 = (x8 - (W1 + W7) * x5) >> 3;
352 x8 = W3 * (x6 + x7) + 4;
353 x6 = (x8 - (W3 - W5) * x6) >> 3;
354 x7 = (x8 - (W3 + W5) * x7) >> 3;
355 x8 = x0 + x1;
356 x0 -= x1;
357 x1 = W6 * (x3 + x2) + 4;
358 x2 = (x1 - (W2 + W6) * x2) >> 3;
359 x3 = (x1 + (W2 - W6) * x3) >> 3;
360 x1 = x4 + x6;
361 x4 -= x6;
362 x6 = x5 + x7;
363 x5 -= x7;
364 x7 = x8 + x3;
365 x8 -= x3;
366 x3 = x0 + x2;
367 x0 -= x2;
368 x2 = (181 * (x4 + x5) + 128) >> 8;
369 x4 = (181 * (x4 - x5) + 128) >> 8;
370 *out = njClip(((x7 + x1) >> 14) + 128); out += stride;
371 *out = njClip(((x3 + x2) >> 14) + 128); out += stride;
372 *out = njClip(((x0 + x4) >> 14) + 128); out += stride;
373 *out = njClip(((x8 + x6) >> 14) + 128); out += stride;
374 *out = njClip(((x8 - x6) >> 14) + 128); out += stride;
375 *out = njClip(((x0 - x4) >> 14) + 128); out += stride;
376 *out = njClip(((x3 - x2) >> 14) + 128); out += stride;
377 *out = njClip(((x7 - x1) >> 14) + 128);
378}
379
380void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
381{
382 int coef;
383
384 for (coef = 0; coef < 64; coef += 8)
385 njRowIDCT(&in[coef]);
386 for (coef = 0; coef < 8; ++coef)
387 njColIDCT(&in[coef], &out[coef], outstride);
388}