Squashed 'third_party/apriltag/' content from commit 3e8e974d0
git-subtree-dir: third_party/apriltag
git-subtree-split: 3e8e974d0d8d6ab318abf56d87506d15d7f2cc35
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
Change-Id: I04ba3cb2106b6813a1013d57aa8074c26f856598
diff --git a/common/pjpeg-idct.c b/common/pjpeg-idct.c
new file mode 100644
index 0000000..ddbdde5
--- /dev/null
+++ b/common/pjpeg-idct.c
@@ -0,0 +1,388 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <math.h>
+#include <stdint.h>
+
+#ifndef M_PI
+# define M_PI 3.141592653589793238462643383279502884196
+#endif
+
+// 8 bits of fixed-point output
+//
+// This implementation has a worst-case complexity of 22 multiplies
+// and 64 adds. This makes it significantly worse (about 2x) than the
+// best-known fast inverse cosine transform methods. HOWEVER, zero
+// coefficients can be skipped over, and since that's common (often
+// more than half the coefficients are zero).
+//
+// The output is scaled by a factor of 256 (due to our fixed-point
+// integer arithmetic)..
+static inline void idct_1D_u32(int32_t *in, int instride, int32_t *out, int outstride)
+{
+ for (int x = 0; x < 8; x++)
+ out[x*outstride] = 0;
+
+ int32_t c;
+
+ c = in[0*instride];
+ if (c) {
+ // 181 181 181 181 181 181 181 181
+ int32_t c181 = c * 181;
+ out[0*outstride] += c181;
+ out[1*outstride] += c181;
+ out[2*outstride] += c181;
+ out[3*outstride] += c181;
+ out[4*outstride] += c181;
+ out[5*outstride] += c181;
+ out[6*outstride] += c181;
+ out[7*outstride] += c181;
+ }
+
+ c = in[1*instride];
+ if (c) {
+ // 251 212 142 49 -49 -142 -212 -251
+ int32_t c251 = c * 251;
+ int32_t c212 = c * 212;
+ int32_t c142 = c * 142;
+ int32_t c49 = c * 49;
+ out[0*outstride] += c251;
+ out[1*outstride] += c212;
+ out[2*outstride] += c142;
+ out[3*outstride] += c49;
+ out[4*outstride] -= c49;
+ out[5*outstride] -= c142;
+ out[6*outstride] -= c212;
+ out[7*outstride] -= c251;
+ }
+
+ c = in[2*instride];
+ if (c) {
+ // 236 97 -97 -236 -236 -97 97 236
+ int32_t c236 = c*236;
+ int32_t c97 = c*97;
+ out[0*outstride] += c236;
+ out[1*outstride] += c97;
+ out[2*outstride] -= c97;
+ out[3*outstride] -= c236;
+ out[4*outstride] -= c236;
+ out[5*outstride] -= c97;
+ out[6*outstride] += c97;
+ out[7*outstride] += c236;
+ }
+
+ c = in[3*instride];
+ if (c) {
+ // 212 -49 -251 -142 142 251 49 -212
+ int32_t c212 = c*212;
+ int32_t c49 = c*49;
+ int32_t c251 = c*251;
+ int32_t c142 = c*142;
+ out[0*outstride] += c212;
+ out[1*outstride] -= c49;
+ out[2*outstride] -= c251;
+ out[3*outstride] -= c142;
+ out[4*outstride] += c142;
+ out[5*outstride] += c251;
+ out[6*outstride] += c49;
+ out[7*outstride] -= c212;
+ }
+
+ c = in[4*instride];
+ if (c) {
+ // 181 -181 -181 181 181 -181 -181 181
+ int32_t c181 = c*181;
+ out[0*outstride] += c181;
+ out[1*outstride] -= c181;
+ out[2*outstride] -= c181;
+ out[3*outstride] += c181;
+ out[4*outstride] += c181;
+ out[5*outstride] -= c181;
+ out[6*outstride] -= c181;
+ out[7*outstride] += c181;
+ }
+
+ c = in[5*instride];
+ if (c) {
+ // 142 -251 49 212 -212 -49 251 -142
+ int32_t c142 = c*142;
+ int32_t c251 = c*251;
+ int32_t c49 = c*49;
+ int32_t c212 = c*212;
+ out[0*outstride] += c142;
+ out[1*outstride] -= c251;
+ out[2*outstride] += c49;
+ out[3*outstride] += c212;
+ out[4*outstride] -= c212;
+ out[5*outstride] -= c49;
+ out[6*outstride] += c251;
+ out[7*outstride] -= c142;
+ }
+
+ c = in[6*instride];
+ if (c) {
+ // 97 -236 236 -97 -97 236 -236 97
+ int32_t c97 = c*97;
+ int32_t c236 = c*236;
+ out[0*outstride] += c97;
+ out[1*outstride] -= c236;
+ out[2*outstride] += c236;
+ out[3*outstride] -= c97;
+ out[4*outstride] -= c97;
+ out[5*outstride] += c236;
+ out[6*outstride] -= c236;
+ out[7*outstride] += c97;
+ }
+
+ c = in[7*instride];
+ if (c) {
+ // 49 -142 212 -251 251 -212 142 -49
+ int32_t c49 = c*49;
+ int32_t c142 = c*142;
+ int32_t c212 = c*212;
+ int32_t c251 = c*251;
+ out[0*outstride] += c49;
+ out[1*outstride] -= c142;
+ out[2*outstride] += c212;
+ out[3*outstride] -= c251;
+ out[4*outstride] += c251;
+ out[5*outstride] -= c212;
+ out[6*outstride] += c142;
+ out[7*outstride] -= c49;
+ }
+}
+
+void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
+{
+ int32_t tmp[64];
+
+ // idct on rows
+ for (int y = 0; y < 8; y++)
+ idct_1D_u32(&in[8*y], 1, &tmp[8*y], 1);
+
+ int32_t tmp2[64];
+
+ // idct on columns
+ for (int x = 0; x < 8; x++)
+ idct_1D_u32(&tmp[x], 8, &tmp2[x], 8);
+
+ // scale, adjust bias, and clamp
+ for (int y = 0; y < 8; y++) {
+ for (int x = 0; x < 8; x++) {
+ int i = 8*y + x;
+
+ // Shift of 18: the divide by 4 as part of the idct, and a shift by 16
+ // to undo the fixed-point arithmetic. (We accumulated 8 bits of
+ // fractional precision during each of the row and column IDCTs)
+ //
+ // Originally:
+ // int32_t v = (tmp2[i] >> 18) + 128;
+ //
+ // Move the add before the shift and we can do rounding at
+ // the same time.
+ const int32_t offset = (128 << 18) + (1 << 17);
+ int32_t v = (tmp2[i] + offset) >> 18;
+
+ if (v < 0)
+ v = 0;
+ if (v > 255)
+ v = 255;
+
+ out[y*outstride + x] = v;
+ }
+ }
+}
+
+///////////////////////////////////////////////////////
+// Below: a "as straight-forward as I can make" implementation.
+static inline void idct_1D_double(double *in, int instride, double *out, int outstride)
+{
+ for (int x = 0; x < 8; x++)
+ out[x*outstride] = 0;
+
+ // iterate over IDCT coefficients
+ double Cu = 1/sqrt(2);
+
+ for (int u = 0; u < 8; u++, Cu = 1) {
+
+ double coeff = in[u*instride];
+ if (coeff == 0)
+ continue;
+
+ for (int x = 0; x < 8; x++)
+ out[x*outstride] += Cu*cos((2*x+1)*u*M_PI/16) * coeff;
+ }
+}
+
+void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
+{
+ double din[64], dout[64];
+ for (int i = 0; i < 64; i++)
+ din[i] = in[i];
+
+ double tmp[64];
+
+ // idct on rows
+ for (int y = 0; y < 8; y++)
+ idct_1D_double(&din[8*y], 1, &tmp[8*y], 1);
+
+ // idct on columns
+ for (int x = 0; x < 8; x++)
+ idct_1D_double(&tmp[x], 8, &dout[x], 8);
+
+ // scale, adjust bias, and clamp
+ for (int y = 0; y < 8; y++) {
+ for (int x = 0; x < 8; x++) {
+ int i = 8*y + x;
+
+ dout[i] = (dout[i] / 4) + 128;
+ if (dout[i] < 0)
+ dout[i] = 0;
+ if (dout[i] > 255)
+ dout[i] = 255;
+
+ // XXX round by adding +.5?
+ out[y*outstride + x] = dout[i];
+ }
+ }
+}
+
+//////////////////////////////////////////////
+static inline unsigned char njClip(const int x) {
+ return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x);
+}
+
+#define W1 2841
+#define W2 2676
+#define W3 2408
+#define W5 1609
+#define W6 1108
+#define W7 565
+
+static inline void njRowIDCT(int* blk) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+ if (!((x1 = blk[4] << 11)
+ | (x2 = blk[6])
+ | (x3 = blk[2])
+ | (x4 = blk[1])
+ | (x5 = blk[7])
+ | (x6 = blk[5])
+ | (x7 = blk[3])))
+ {
+ blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
+ return;
+ }
+ x0 = (blk[0] << 11) + 128;
+ x8 = W7 * (x4 + x5);
+ x4 = x8 + (W1 - W7) * x4;
+ x5 = x8 - (W1 + W7) * x5;
+ x8 = W3 * (x6 + x7);
+ x6 = x8 - (W3 - W5) * x6;
+ x7 = x8 - (W3 + W5) * x7;
+ x8 = x0 + x1;
+ x0 -= x1;
+ x1 = W6 * (x3 + x2);
+ x2 = x1 - (W2 + W6) * x2;
+ x3 = x1 + (W2 - W6) * x3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+ x7 = x8 + x3;
+ x8 -= x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+ blk[0] = (x7 + x1) >> 8;
+ blk[1] = (x3 + x2) >> 8;
+ blk[2] = (x0 + x4) >> 8;
+ blk[3] = (x8 + x6) >> 8;
+ blk[4] = (x8 - x6) >> 8;
+ blk[5] = (x0 - x4) >> 8;
+ blk[6] = (x3 - x2) >> 8;
+ blk[7] = (x7 - x1) >> 8;
+}
+
+static inline void njColIDCT(const int* blk, unsigned char *out, int stride) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8;
+ if (!((x1 = blk[8*4] << 8)
+ | (x2 = blk[8*6])
+ | (x3 = blk[8*2])
+ | (x4 = blk[8*1])
+ | (x5 = blk[8*7])
+ | (x6 = blk[8*5])
+ | (x7 = blk[8*3])))
+ {
+ x1 = njClip(((blk[0] + 32) >> 6) + 128);
+ for (x0 = 8; x0; --x0) {
+ *out = (unsigned char) x1;
+ out += stride;
+ }
+ return;
+ }
+ x0 = (blk[0] << 8) + 8192;
+ x8 = W7 * (x4 + x5) + 4;
+ x4 = (x8 + (W1 - W7) * x4) >> 3;
+ x5 = (x8 - (W1 + W7) * x5) >> 3;
+ x8 = W3 * (x6 + x7) + 4;
+ x6 = (x8 - (W3 - W5) * x6) >> 3;
+ x7 = (x8 - (W3 + W5) * x7) >> 3;
+ x8 = x0 + x1;
+ x0 -= x1;
+ x1 = W6 * (x3 + x2) + 4;
+ x2 = (x1 - (W2 + W6) * x2) >> 3;
+ x3 = (x1 + (W2 - W6) * x3) >> 3;
+ x1 = x4 + x6;
+ x4 -= x6;
+ x6 = x5 + x7;
+ x5 -= x7;
+ x7 = x8 + x3;
+ x8 -= x3;
+ x3 = x0 + x2;
+ x0 -= x2;
+ x2 = (181 * (x4 + x5) + 128) >> 8;
+ x4 = (181 * (x4 - x5) + 128) >> 8;
+ *out = njClip(((x7 + x1) >> 14) + 128); out += stride;
+ *out = njClip(((x3 + x2) >> 14) + 128); out += stride;
+ *out = njClip(((x0 + x4) >> 14) + 128); out += stride;
+ *out = njClip(((x8 + x6) >> 14) + 128); out += stride;
+ *out = njClip(((x8 - x6) >> 14) + 128); out += stride;
+ *out = njClip(((x0 - x4) >> 14) + 128); out += stride;
+ *out = njClip(((x3 - x2) >> 14) + 128); out += stride;
+ *out = njClip(((x7 - x1) >> 14) + 128);
+}
+
+void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
+{
+ int coef;
+
+ for (coef = 0; coef < 64; coef += 8)
+ njRowIDCT(&in[coef]);
+ for (coef = 0; coef < 8; ++coef)
+ njColIDCT(&in[coef], &out[coef], outstride);
+}