Austin Schuh | 3333ec7 | 2022-12-29 16:21:06 -0800 | [diff] [blame^] | 1 | /* Copyright (C) 2013-2016, The Regents of The University of Michigan. |
| 2 | All rights reserved. |
| 3 | This software was developed in the APRIL Robotics Lab under the |
| 4 | direction of Edwin Olson, ebolson@umich.edu. This software may be |
| 5 | available under alternative licensing terms; contact the address above. |
| 6 | Redistribution and use in source and binary forms, with or without |
| 7 | modification, are permitted provided that the following conditions are met: |
| 8 | 1. Redistributions of source code must retain the above copyright notice, this |
| 9 | list of conditions and the following disclaimer. |
| 10 | 2. Redistributions in binary form must reproduce the above copyright notice, |
| 11 | this list of conditions and the following disclaimer in the documentation |
| 12 | and/or other materials provided with the distribution. |
| 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 23 | The views and conclusions contained in the software and documentation are those |
| 24 | of the authors and should not be interpreted as representing official policies, |
| 25 | either expressed or implied, of the Regents of The University of Michigan. |
| 26 | */ |
| 27 | |
| 28 | #include <math.h> |
| 29 | #include <stdint.h> |
| 30 | |
| 31 | #ifndef M_PI |
| 32 | # define M_PI 3.141592653589793238462643383279502884196 |
| 33 | #endif |
| 34 | |
| 35 | // 8 bits of fixed-point output |
| 36 | // |
| 37 | // This implementation has a worst-case complexity of 22 multiplies |
| 38 | // and 64 adds. This makes it significantly worse (about 2x) than the |
| 39 | // best-known fast inverse cosine transform methods. HOWEVER, zero |
| 40 | // coefficients can be skipped over, and since that's common (often |
| 41 | // more than half the coefficients are zero). |
| 42 | // |
| 43 | // The output is scaled by a factor of 256 (due to our fixed-point |
| 44 | // integer arithmetic).. |
| 45 | static inline void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride) |
| 46 | { |
| 47 | for (int x = 0; x < 8; x++) |
| 48 | out[x*outstride] = 0; |
| 49 | |
| 50 | int32_t c; |
| 51 | |
| 52 | c = in[0*instride]; |
| 53 | if (c) { |
| 54 | // 181 181 181 181 181 181 181 181 |
| 55 | int32_t c181 = c * 181; |
| 56 | out[0*outstride] += c181; |
| 57 | out[1*outstride] += c181; |
| 58 | out[2*outstride] += c181; |
| 59 | out[3*outstride] += c181; |
| 60 | out[4*outstride] += c181; |
| 61 | out[5*outstride] += c181; |
| 62 | out[6*outstride] += c181; |
| 63 | out[7*outstride] += c181; |
| 64 | } |
| 65 | |
| 66 | c = in[1*instride]; |
| 67 | if (c) { |
| 68 | // 251 212 142 49 -49 -142 -212 -251 |
| 69 | int32_t c251 = c * 251; |
| 70 | int32_t c212 = c * 212; |
| 71 | int32_t c142 = c * 142; |
| 72 | int32_t c49 = c * 49; |
| 73 | out[0*outstride] += c251; |
| 74 | out[1*outstride] += c212; |
| 75 | out[2*outstride] += c142; |
| 76 | out[3*outstride] += c49; |
| 77 | out[4*outstride] -= c49; |
| 78 | out[5*outstride] -= c142; |
| 79 | out[6*outstride] -= c212; |
| 80 | out[7*outstride] -= c251; |
| 81 | } |
| 82 | |
| 83 | c = in[2*instride]; |
| 84 | if (c) { |
| 85 | // 236 97 -97 -236 -236 -97 97 236 |
| 86 | int32_t c236 = c*236; |
| 87 | int32_t c97 = c*97; |
| 88 | out[0*outstride] += c236; |
| 89 | out[1*outstride] += c97; |
| 90 | out[2*outstride] -= c97; |
| 91 | out[3*outstride] -= c236; |
| 92 | out[4*outstride] -= c236; |
| 93 | out[5*outstride] -= c97; |
| 94 | out[6*outstride] += c97; |
| 95 | out[7*outstride] += c236; |
| 96 | } |
| 97 | |
| 98 | c = in[3*instride]; |
| 99 | if (c) { |
| 100 | // 212 -49 -251 -142 142 251 49 -212 |
| 101 | int32_t c212 = c*212; |
| 102 | int32_t c49 = c*49; |
| 103 | int32_t c251 = c*251; |
| 104 | int32_t c142 = c*142; |
| 105 | out[0*outstride] += c212; |
| 106 | out[1*outstride] -= c49; |
| 107 | out[2*outstride] -= c251; |
| 108 | out[3*outstride] -= c142; |
| 109 | out[4*outstride] += c142; |
| 110 | out[5*outstride] += c251; |
| 111 | out[6*outstride] += c49; |
| 112 | out[7*outstride] -= c212; |
| 113 | } |
| 114 | |
| 115 | c = in[4*instride]; |
| 116 | if (c) { |
| 117 | // 181 -181 -181 181 181 -181 -181 181 |
| 118 | int32_t c181 = c*181; |
| 119 | out[0*outstride] += c181; |
| 120 | out[1*outstride] -= c181; |
| 121 | out[2*outstride] -= c181; |
| 122 | out[3*outstride] += c181; |
| 123 | out[4*outstride] += c181; |
| 124 | out[5*outstride] -= c181; |
| 125 | out[6*outstride] -= c181; |
| 126 | out[7*outstride] += c181; |
| 127 | } |
| 128 | |
| 129 | c = in[5*instride]; |
| 130 | if (c) { |
| 131 | // 142 -251 49 212 -212 -49 251 -142 |
| 132 | int32_t c142 = c*142; |
| 133 | int32_t c251 = c*251; |
| 134 | int32_t c49 = c*49; |
| 135 | int32_t c212 = c*212; |
| 136 | out[0*outstride] += c142; |
| 137 | out[1*outstride] -= c251; |
| 138 | out[2*outstride] += c49; |
| 139 | out[3*outstride] += c212; |
| 140 | out[4*outstride] -= c212; |
| 141 | out[5*outstride] -= c49; |
| 142 | out[6*outstride] += c251; |
| 143 | out[7*outstride] -= c142; |
| 144 | } |
| 145 | |
| 146 | c = in[6*instride]; |
| 147 | if (c) { |
| 148 | // 97 -236 236 -97 -97 236 -236 97 |
| 149 | int32_t c97 = c*97; |
| 150 | int32_t c236 = c*236; |
| 151 | out[0*outstride] += c97; |
| 152 | out[1*outstride] -= c236; |
| 153 | out[2*outstride] += c236; |
| 154 | out[3*outstride] -= c97; |
| 155 | out[4*outstride] -= c97; |
| 156 | out[5*outstride] += c236; |
| 157 | out[6*outstride] -= c236; |
| 158 | out[7*outstride] += c97; |
| 159 | } |
| 160 | |
| 161 | c = in[7*instride]; |
| 162 | if (c) { |
| 163 | // 49 -142 212 -251 251 -212 142 -49 |
| 164 | int32_t c49 = c*49; |
| 165 | int32_t c142 = c*142; |
| 166 | int32_t c212 = c*212; |
| 167 | int32_t c251 = c*251; |
| 168 | out[0*outstride] += c49; |
| 169 | out[1*outstride] -= c142; |
| 170 | out[2*outstride] += c212; |
| 171 | out[3*outstride] -= c251; |
| 172 | out[4*outstride] += c251; |
| 173 | out[5*outstride] -= c212; |
| 174 | out[6*outstride] += c142; |
| 175 | out[7*outstride] -= c49; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride) |
| 180 | { |
| 181 | int32_t tmp[64]; |
| 182 | |
| 183 | // idct on rows |
| 184 | for (int y = 0; y < 8; y++) |
| 185 | idct_1D_u32(&in[8*y], 1, &tmp[8*y], 1); |
| 186 | |
| 187 | int32_t tmp2[64]; |
| 188 | |
| 189 | // idct on columns |
| 190 | for (int x = 0; x < 8; x++) |
| 191 | idct_1D_u32(&tmp[x], 8, &tmp2[x], 8); |
| 192 | |
| 193 | // scale, adjust bias, and clamp |
| 194 | for (int y = 0; y < 8; y++) { |
| 195 | for (int x = 0; x < 8; x++) { |
| 196 | int i = 8*y + x; |
| 197 | |
| 198 | // Shift of 18: the divide by 4 as part of the idct, and a shift by 16 |
| 199 | // to undo the fixed-point arithmetic. (We accumulated 8 bits of |
| 200 | // fractional precision during each of the row and column IDCTs) |
| 201 | // |
| 202 | // Originally: |
| 203 | // int32_t v = (tmp2[i] >> 18) + 128; |
| 204 | // |
| 205 | // Move the add before the shift and we can do rounding at |
| 206 | // the same time. |
| 207 | const int32_t offset = (128 << 18) + (1 << 17); |
| 208 | int32_t v = (tmp2[i] + offset) >> 18; |
| 209 | |
| 210 | if (v < 0) |
| 211 | v = 0; |
| 212 | if (v > 255) |
| 213 | v = 255; |
| 214 | |
| 215 | out[y*outstride + x] = v; |
| 216 | } |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | /////////////////////////////////////////////////////// |
| 221 | // Below: a "as straight-forward as I can make" implementation. |
| 222 | static inline void idct_1D_double(double *in, int instride, double *out, int outstride) |
| 223 | { |
| 224 | for (int x = 0; x < 8; x++) |
| 225 | out[x*outstride] = 0; |
| 226 | |
| 227 | // iterate over IDCT coefficients |
| 228 | double Cu = 1/sqrt(2); |
| 229 | |
| 230 | for (int u = 0; u < 8; u++, Cu = 1) { |
| 231 | |
| 232 | double coeff = in[u*instride]; |
| 233 | if (coeff == 0) |
| 234 | continue; |
| 235 | |
| 236 | for (int x = 0; x < 8; x++) |
| 237 | out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff; |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride) |
| 242 | { |
| 243 | double din[64], dout[64]; |
| 244 | for (int i = 0; i < 64; i++) |
| 245 | din[i] = in[i]; |
| 246 | |
| 247 | double tmp[64]; |
| 248 | |
| 249 | // idct on rows |
| 250 | for (int y = 0; y < 8; y++) |
| 251 | idct_1D_double(&din[8*y], 1, &tmp[8*y], 1); |
| 252 | |
| 253 | // idct on columns |
| 254 | for (int x = 0; x < 8; x++) |
| 255 | idct_1D_double(&tmp[x], 8, &dout[x], 8); |
| 256 | |
| 257 | // scale, adjust bias, and clamp |
| 258 | for (int y = 0; y < 8; y++) { |
| 259 | for (int x = 0; x < 8; x++) { |
| 260 | int i = 8*y + x; |
| 261 | |
| 262 | dout[i] = (dout[i] / 4) + 128; |
| 263 | if (dout[i] < 0) |
| 264 | dout[i] = 0; |
| 265 | if (dout[i] > 255) |
| 266 | dout[i] = 255; |
| 267 | |
| 268 | // XXX round by adding +.5? |
| 269 | out[y*outstride + x] = dout[i]; |
| 270 | } |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | ////////////////////////////////////////////// |
| 275 | static inline unsigned char njClip(const int x) { |
| 276 | return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x); |
| 277 | } |
| 278 | |
| 279 | #define W1 2841 |
| 280 | #define W2 2676 |
| 281 | #define W3 2408 |
| 282 | #define W5 1609 |
| 283 | #define W6 1108 |
| 284 | #define W7 565 |
| 285 | |
| 286 | static inline void njRowIDCT(int* blk) { |
| 287 | int x0, x1, x2, x3, x4, x5, x6, x7, x8; |
| 288 | if (!((x1 = blk[4] << 11) |
| 289 | | (x2 = blk[6]) |
| 290 | | (x3 = blk[2]) |
| 291 | | (x4 = blk[1]) |
| 292 | | (x5 = blk[7]) |
| 293 | | (x6 = blk[5]) |
| 294 | | (x7 = blk[3]))) |
| 295 | { |
| 296 | blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3; |
| 297 | return; |
| 298 | } |
| 299 | x0 = (blk[0] << 11) + 128; |
| 300 | x8 = W7 * (x4 + x5); |
| 301 | x4 = x8 + (W1 - W7) * x4; |
| 302 | x5 = x8 - (W1 + W7) * x5; |
| 303 | x8 = W3 * (x6 + x7); |
| 304 | x6 = x8 - (W3 - W5) * x6; |
| 305 | x7 = x8 - (W3 + W5) * x7; |
| 306 | x8 = x0 + x1; |
| 307 | x0 -= x1; |
| 308 | x1 = W6 * (x3 + x2); |
| 309 | x2 = x1 - (W2 + W6) * x2; |
| 310 | x3 = x1 + (W2 - W6) * x3; |
| 311 | x1 = x4 + x6; |
| 312 | x4 -= x6; |
| 313 | x6 = x5 + x7; |
| 314 | x5 -= x7; |
| 315 | x7 = x8 + x3; |
| 316 | x8 -= x3; |
| 317 | x3 = x0 + x2; |
| 318 | x0 -= x2; |
| 319 | x2 = (181 * (x4 + x5) + 128) >> 8; |
| 320 | x4 = (181 * (x4 - x5) + 128) >> 8; |
| 321 | blk[0] = (x7 + x1) >> 8; |
| 322 | blk[1] = (x3 + x2) >> 8; |
| 323 | blk[2] = (x0 + x4) >> 8; |
| 324 | blk[3] = (x8 + x6) >> 8; |
| 325 | blk[4] = (x8 - x6) >> 8; |
| 326 | blk[5] = (x0 - x4) >> 8; |
| 327 | blk[6] = (x3 - x2) >> 8; |
| 328 | blk[7] = (x7 - x1) >> 8; |
| 329 | } |
| 330 | |
| 331 | static inline void njColIDCT(const int* blk, unsigned char *out, int stride) { |
| 332 | int x0, x1, x2, x3, x4, x5, x6, x7, x8; |
| 333 | if (!((x1 = blk[8*4] << 8) |
| 334 | | (x2 = blk[8*6]) |
| 335 | | (x3 = blk[8*2]) |
| 336 | | (x4 = blk[8*1]) |
| 337 | | (x5 = blk[8*7]) |
| 338 | | (x6 = blk[8*5]) |
| 339 | | (x7 = blk[8*3]))) |
| 340 | { |
| 341 | x1 = njClip(((blk[0] + 32) >> 6) + 128); |
| 342 | for (x0 = 8; x0; --x0) { |
| 343 | *out = (unsigned char) x1; |
| 344 | out += stride; |
| 345 | } |
| 346 | return; |
| 347 | } |
| 348 | x0 = (blk[0] << 8) + 8192; |
| 349 | x8 = W7 * (x4 + x5) + 4; |
| 350 | x4 = (x8 + (W1 - W7) * x4) >> 3; |
| 351 | x5 = (x8 - (W1 + W7) * x5) >> 3; |
| 352 | x8 = W3 * (x6 + x7) + 4; |
| 353 | x6 = (x8 - (W3 - W5) * x6) >> 3; |
| 354 | x7 = (x8 - (W3 + W5) * x7) >> 3; |
| 355 | x8 = x0 + x1; |
| 356 | x0 -= x1; |
| 357 | x1 = W6 * (x3 + x2) + 4; |
| 358 | x2 = (x1 - (W2 + W6) * x2) >> 3; |
| 359 | x3 = (x1 + (W2 - W6) * x3) >> 3; |
| 360 | x1 = x4 + x6; |
| 361 | x4 -= x6; |
| 362 | x6 = x5 + x7; |
| 363 | x5 -= x7; |
| 364 | x7 = x8 + x3; |
| 365 | x8 -= x3; |
| 366 | x3 = x0 + x2; |
| 367 | x0 -= x2; |
| 368 | x2 = (181 * (x4 + x5) + 128) >> 8; |
| 369 | x4 = (181 * (x4 - x5) + 128) >> 8; |
| 370 | *out = njClip(((x7 + x1) >> 14) + 128); out += stride; |
| 371 | *out = njClip(((x3 + x2) >> 14) + 128); out += stride; |
| 372 | *out = njClip(((x0 + x4) >> 14) + 128); out += stride; |
| 373 | *out = njClip(((x8 + x6) >> 14) + 128); out += stride; |
| 374 | *out = njClip(((x8 - x6) >> 14) + 128); out += stride; |
| 375 | *out = njClip(((x0 - x4) >> 14) + 128); out += stride; |
| 376 | *out = njClip(((x3 - x2) >> 14) + 128); out += stride; |
| 377 | *out = njClip(((x7 - x1) >> 14) + 128); |
| 378 | } |
| 379 | |
| 380 | void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride) |
| 381 | { |
| 382 | int coef; |
| 383 | |
| 384 | for (coef = 0; coef < 64; coef += 8) |
| 385 | njRowIDCT(&in[coef]); |
| 386 | for (coef = 0; coef < 8; ++coef) |
| 387 | njColIDCT(&in[coef], &out[coef], outstride); |
| 388 | } |