Squashed 'third_party/apriltag/' content from commit 3e8e974d0
git-subtree-dir: third_party/apriltag
git-subtree-split: 3e8e974d0d8d6ab318abf56d87506d15d7f2cc35
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
Change-Id: I04ba3cb2106b6813a1013d57aa8074c26f856598
diff --git a/common/pjpeg.c b/common/pjpeg.c
new file mode 100644
index 0000000..acc61f0
--- /dev/null
+++ b/common/pjpeg.c
@@ -0,0 +1,893 @@
+/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
+All rights reserved.
+This software was developed in the APRIL Robotics Lab under the
+direction of Edwin Olson, ebolson@umich.edu. This software may be
+available under alternative licensing terms; contact the address above.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+The views and conclusions contained in the software and documentation are those
+of the authors and should not be interpreted as representing official policies,
+either expressed or implied, of the Regents of The University of Michigan.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "pjpeg.h"
+
+#include "image_u8.h"
+#include "image_u8x3.h"
+#include "debug_print.h"
+
+// https://www.w3.org/Graphics/JPEG/itu-t81.pdf
+
+void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride);
+void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride);
+void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride);
+
+struct pjpeg_huffman_code
+{
+ uint8_t nbits; // how many bits should we actually consume?
+ uint8_t code; // what is the symbol that was encoded? (not actually a DCT coefficient; see encoding)
+};
+
+struct pjpeg_decode_state
+{
+ int error;
+
+ uint32_t width, height;
+ uint8_t *in;
+ uint32_t inlen;
+
+ uint32_t flags;
+
+ // to decode, we load the next 16 bits of input (generally more
+ // than we need). We then look up in our code book how many bits
+ // we have actually consumed. For example, if there was a code
+ // whose bit sequence was "0", the first 32768 entries would all
+ // be copies of {.bits=1, .value=XX}; no matter what the following
+ // 15 bits are, we would get the correct decode.
+ //
+ // Can be up to 8 tables; computed as (ACDC * 2 + htidx)
+ struct pjpeg_huffman_code huff_codes[4][65536];
+ int huff_codes_present[4];
+
+ uint8_t qtab[4][64];
+
+ int ncomponents;
+ pjpeg_component_t *components;
+
+ int reset_interval;
+ int reset_count;
+ int reset_next; // What reset marker do we expect next? (add 0xd0)
+
+ int debug;
+};
+
+// from K.3.3.1 (page 158)
+static uint8_t mjpeg_dht[] = { // header
+ 0xFF,0xC4,0x01,0xA2,
+
+ /////////////////////////////////////////////////////////////
+ // luminance dc coefficients.
+ // DC table 0
+ 0x00,
+ // code lengths
+ 0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ // values
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,
+
+ /////////////////////////////////////////////////////////////
+ // chrominance DC coefficents
+ // DC table 1
+ 0x01,
+ // code lengths
+ 0x00,0x03,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,
+ // values
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,
+
+ /////////////////////////////////////////////////////////////
+ // luminance AC coefficients
+ // AC table 0
+ 0x10,
+ // code lengths
+ 0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,0x01,0x7D,
+ // codes
+ 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,
+ 0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08,0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,
+ 0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28,0x29,0x2A,0x34,
+ 0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,
+ 0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,
+ 0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,
+ 0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,
+ 0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,
+ 0xDA,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
+ 0xF8,0xF9,0xFA,
+
+ /////////////////////////////////////////////////////////////
+ // chrominance DC coefficients
+ // DC table 1
+ 0x11,
+ // code lengths
+ 0x00,0x02,0x01,0x02,0x04,0x04,0x03,0x04,0x07,0x05,0x04,0x04,0x00,0x01,0x02,0x77,
+ // values
+ 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,
+ 0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,
+ 0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26,0x27,0x28,0x29,0x2A,
+ 0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,
+ 0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,
+ 0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,
+ 0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,
+ 0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,
+ 0xD9,0xDA,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,
+ 0xF9,0xFA
+};
+
+static inline uint8_t max_u8(uint8_t a, uint8_t b)
+{
+ return a > b ? a : b;
+}
+
+// order of coefficients in each DC block
+static const char ZZ[64] = { 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63 };
+
+
+
+struct bit_decoder
+{
+ uint8_t *in;
+ uint32_t inpos;
+ uint32_t inlen;
+
+ uint32_t bits; // the low order bits contain the next nbits_avail bits.
+
+ int nbits_avail; // how many bits in 'bits' (left aligned) are valid?
+
+ int error;
+};
+
+// ensure that at least 'nbits' of data is available in the bit decoder.
+static inline void bd_ensure(struct bit_decoder *bd, int nbits)
+{
+ while (bd->nbits_avail < nbits) {
+
+ if (bd->inpos >= bd->inlen) {
+ printf("hallucinating 1s!\n");
+ // we hit end of stream hallucinate an infinite stream of 1s
+ bd->bits = (bd->bits << 8) | 0xff;
+ bd->nbits_avail += 8;
+ continue;
+ }
+
+ uint8_t nextbyte = bd->in[bd->inpos];
+ bd->inpos++;
+
+ if (nextbyte == 0xff && bd->inpos < bd->inlen && bd->in[bd->inpos] == 0x00) {
+ // a stuffed byte
+ nextbyte = 0xff;
+ bd->inpos++;
+ }
+
+ // it's an ordinary byte
+ bd->bits = (bd->bits << 8) | nextbyte;
+ bd->nbits_avail += 8;
+ }
+}
+
+static inline uint32_t bd_peek_bits(struct bit_decoder *bd, int nbits)
+{
+ bd_ensure(bd, nbits);
+
+ return (bd->bits >> (bd->nbits_avail - nbits)) & ((1 << nbits) - 1);
+}
+
+static inline uint32_t bd_consume_bits(struct bit_decoder *bd, int nbits)
+{
+ assert(nbits < 32);
+
+ bd_ensure(bd, nbits);
+
+ uint32_t v = (bd->bits >> (bd->nbits_avail - nbits)) & ((1 << nbits) - 1);
+
+ bd->nbits_avail -= nbits;
+
+ return v;
+}
+
+// discard without regard for byte stuffing!
+static inline void bd_discard_bytes(struct bit_decoder *bd, int nbytes)
+{
+ assert(bd->nbits_avail == 0);
+ bd->inpos += nbytes;
+}
+
+static inline int bd_has_more(struct bit_decoder *bd)
+{
+ return bd->nbits_avail > 0 || bd->inpos < bd->inlen;
+}
+
+// throw away up to 7 bits of data so that the next data returned
+// began on a byte boundary.
+static inline void bd_discard_to_byte_boundary(struct bit_decoder *bd)
+{
+ bd->nbits_avail -= (bd->nbits_avail & 7);
+}
+
+static inline uint32_t bd_get_offset(struct bit_decoder *bd)
+{
+ return bd->inpos - bd->nbits_avail / 8;
+}
+
+static int pjpeg_decode_buffer(struct pjpeg_decode_state *pjd)
+{
+ // XXX TODO Include sanity check that this is actually a JPG
+
+ struct bit_decoder bd;
+ memset(&bd, 0, sizeof(struct bit_decoder));
+ bd.in = pjd->in;
+ bd.inpos = 0;
+ bd.inlen = pjd->inlen;
+
+ int marker_sync_skipped = 0;
+ int marker_sync_skipped_from_offset = 0;
+
+ while (bd_has_more(&bd)) {
+
+ uint32_t marker_offset = bd_get_offset(&bd);
+
+ // Look for the 0xff that signifies the beginning of a marker
+ bd_discard_to_byte_boundary(&bd);
+
+ while (bd_consume_bits(&bd, 8) != 0xff) {
+ if (marker_sync_skipped == 0)
+ marker_sync_skipped_from_offset = marker_offset;
+ marker_sync_skipped++;
+ continue;
+ }
+
+ if (marker_sync_skipped) {
+ printf("%08x: skipped %04x bytes\n", marker_sync_skipped_from_offset, marker_sync_skipped);
+ marker_sync_skipped = 0;
+ }
+
+ uint8_t marker = bd_consume_bits(&bd, 8);
+
+// printf("marker %08x : %02x\n", marker_offset, marker);
+
+ switch (marker) {
+
+ case 0xd8: // start of image. Great, continue.
+ continue;
+
+ // below are the markers that A) we don't care about
+ // that B) encode length as two bytes.
+ //
+ // Note: Other unknown fields should not be added since
+ // we should be able to skip over them by looking for
+ // the next marker byte.
+ case 0xe0: // JFIF header.
+ case 0xe1: // EXIF header (Yuck: Payload may contain 0xff 0xff!)
+ case 0xe2: // ICC Profile. (Yuck: payload may contain 0xff 0xff!)
+ case 0xe6: // some other common header
+ case 0xfe: // Comment
+ {
+ uint16_t length = bd_consume_bits(&bd, 16);
+ bd_discard_bytes(&bd, length - 2);
+ continue;
+ }
+
+ case 0xdb: { // DQT Define Quantization Table
+ uint16_t length = bd_consume_bits(&bd, 16);
+
+ if (((length-2) % 65) != 0)
+ return PJPEG_ERR_DQT;
+
+ // can contain multiple DQTs
+ for (int offset = 0; offset < length - 2; offset += 65) {
+
+ // pq: quant table element precision. 0=8bit, 1=16bit.
+ // tq: quant table destination id.
+ uint8_t pqtq = bd_consume_bits(&bd, 8);
+
+ if ((pqtq & 0xf0) != 0 || (pqtq & 0x0f) >= 4)
+ return PJPEG_ERR_DQT;
+
+ uint8_t id = pqtq & 3;
+
+ for (int i = 0; i < 64; i++)
+ pjd->qtab[id][i] = bd_consume_bits(&bd, 8);
+ }
+
+ break;
+ }
+
+ case 0xc0: { // SOF, non-differential, huffman, baseline
+ uint16_t length = bd_consume_bits(&bd, 16);
+ (void) length;
+
+ uint8_t p = bd_consume_bits(&bd, 8); // precision
+ if (p != 8)
+ return PJPEG_ERR_SOF;
+
+ pjd->height = bd_consume_bits(&bd, 16);
+ pjd->width = bd_consume_bits(&bd, 16);
+
+// printf("%d x %d\n", pjd->height, pjd->width);
+
+ int nf = bd_consume_bits(&bd, 8); // # image components
+
+ if (nf < 1 || nf > 3)
+ return PJPEG_ERR_SOF;
+
+ pjd->ncomponents = nf;
+ pjd->components = calloc(nf, sizeof(struct pjpeg_component));
+
+ for (int i = 0; i < nf; i++) {
+ // comp. identifier
+ pjd->components[i].id = bd_consume_bits(&bd, 8);
+
+ // horiz/vert sampling
+ pjd->components[i].hv = bd_consume_bits(&bd, 8);
+ pjd->components[i].scaley = pjd->components[i].hv & 0x0f;
+ pjd->components[i].scalex = pjd->components[i].hv >> 4;
+
+ // which quant table?
+ pjd->components[i].tq = bd_consume_bits(&bd, 8);
+ }
+ break;
+ }
+
+ case 0xc1: // SOF, non-differential, huffman, extended DCT
+ case 0xc2: // SOF, non-differential, huffman, progressive DCT
+ case 0xc3: // SOF, non-differential, huffman, lossless
+ case 0xc5: // SOF, differential, huffman, baseline DCT
+ case 0xc6: // SOF, differential, huffman, progressive
+ case 0xc7: // SOF, differential, huffman, lossless
+ case 0xc8: // reserved
+ case 0xc9: // SOF, non-differential, arithmetic, extended
+ case 0xca: // SOF, non-differential, arithmetic, progressive
+ case 0xcb: // SOF, non-differential, arithmetic, lossless
+ case 0xcd: // SOF, differential, arithmetic, sequential
+ case 0xce: // SOF, differential, arithmetic, progressive
+ case 0xcf: // SOF, differential, arithmetic, lossless
+ {
+ printf("pjepg.c: unsupported JPEG type %02x\n", marker);
+ return PJEPG_ERR_UNSUPPORTED;
+ }
+
+ case 0xc4: { // DHT Define Huffman Tables
+ // [ED: the encoding of these tables is really quite
+ // clever!]
+ uint16_t length = bd_consume_bits(&bd, 16);
+ length = length - 2;
+
+ while (length > 0) {
+ uint8_t TcTh = bd_consume_bits(&bd, 8);
+ length--;
+ uint8_t Tc = (TcTh >> 4);
+ int Th = TcTh & 0x0f; // which index are we using?
+
+ if (Tc >= 2 || Th >= 2)
+ // Tc must be either AC=1 or DC=0.
+ // Th must be less than 2
+ return PJPEG_ERR_DHT;
+
+ int htidx = Tc*2 + Th;
+
+ uint8_t L[17]; // how many symbols of each bit length?
+ L[0] = 0; // no 0 bit codes :)
+ for (int nbits = 1; nbits <= 16; nbits++) {
+ L[nbits] = bd_consume_bits(&bd, 8);
+ length -= L[nbits];
+ }
+ length -= 16;
+
+ uint32_t code_pos = 0;
+
+ for (int nbits = 1; nbits <= 16; nbits++) {
+ int nvalues = L[nbits];
+
+ // how many entries will we fill?
+ // (a 1 bit code will fill 32768, a 2 bit code 16384, ...)
+ uint32_t ncodes = (1 << (16 - nbits));
+
+ // consume the values...
+ for (int vi = 0; vi < nvalues; vi++) {
+ uint8_t code = bd_consume_bits(&bd, 8);
+
+ if (code_pos + ncodes > 0xffff)
+ return PJPEG_ERR_DHT;
+
+ for (int ci = 0; ci < ncodes; ci++) {
+ pjd->huff_codes[htidx][code_pos].nbits = nbits;
+ pjd->huff_codes[htidx][code_pos].code = code;
+ code_pos++;
+ }
+ }
+ }
+ pjd->huff_codes_present[htidx] = 1;
+ }
+ break;
+ }
+
+ // a sequentially-encoded JPG has one SOS segment. A
+ // progressive JPG will have multiple SOS segments.
+ case 0xda: { // Start Of Scan (SOS)
+
+ // Note that this marker frame (and its encoded
+ // length) does NOT include the bitstream that
+ // follows.
+
+ uint16_t length = bd_consume_bits(&bd, 16);
+ (void) length;
+
+ // number of components in this scan
+ uint8_t ns = bd_consume_bits(&bd, 8);
+
+ // for each component, what is the index into our pjd->components[] array?
+ uint8_t *comp_idx = calloc(ns, sizeof(uint8_t));
+
+ for (int i = 0; i < ns; i++) {
+ // component name
+ uint8_t cs = bd_consume_bits(&bd, 8);
+
+ int found = 0;
+ for (int j = 0; j < pjd->ncomponents; j++) {
+
+ if (cs == pjd->components[j].id) {
+ // which huff tables will we use for
+ // DC (high 4 bits) and AC (low 4 bits)
+ pjd->components[j].tda = bd_consume_bits(&bd, 8);
+ comp_idx[i] = j;
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ return PJPEG_ERR_SOS;
+ }
+
+ // start of spectral selection. baseline == 0
+ uint8_t ss = bd_consume_bits(&bd, 8);
+
+ // end of spectral selection. baseline == 0x3f
+ uint8_t se = bd_consume_bits(&bd, 8);
+
+ // successive approximation bits. baseline == 0
+ uint8_t Ahl = bd_consume_bits(&bd, 8);
+
+ if (ss != 0 || se != 0x3f || Ahl != 0x00)
+ return PJPEG_ERR_SOS;
+
+ // compute the dimensions of each MCU in pixels
+ int maxmcux = 0, maxmcuy = 0;
+ for (int i = 0; i < ns; i++) {
+ struct pjpeg_component *comp = &pjd->components[comp_idx[i]];
+
+ maxmcux = max_u8(maxmcux, comp->scalex * 8);
+ maxmcuy = max_u8(maxmcuy, comp->scaley * 8);
+ }
+
+ // how many MCU blocks are required to encode the whole image?
+ int mcus_x = (pjd->width + maxmcux - 1) / maxmcux;
+ int mcus_y = (pjd->height + maxmcuy - 1) / maxmcuy;
+
+ if (0)
+ printf("Image has %d x %d MCU blocks, each %d x %d pixels\n",
+ mcus_x, mcus_y, maxmcux, maxmcuy);
+
+ // allocate output storage
+ for (int i = 0; i < ns; i++) {
+ struct pjpeg_component *comp = &pjd->components[comp_idx[i]];
+ comp->width = mcus_x * comp->scalex * 8;
+ comp->height = mcus_y * comp->scaley * 8;
+ comp->stride = comp->width;
+
+ int alignment = 32;
+ if ((comp->stride % alignment) != 0)
+ comp->stride += alignment - (comp->stride % alignment);
+
+ comp->data = calloc(comp->height * comp->stride, 1);
+ }
+
+
+ // each component has its own DC prediction
+ int32_t *dcpred = calloc(ns, sizeof(int32_t));
+
+ pjd->reset_count = 0;
+
+ for (int mcu_y = 0; mcu_y < mcus_y; mcu_y++) {
+ for (int mcu_x = 0; mcu_x < mcus_x; mcu_x++) {
+
+ // the next two bytes in the input stream
+ // should be 0xff 0xdN, where N is the next
+ // reset counter.
+ //
+ // Our bit decoder may have already shifted
+ // these into the buffer. Consequently, we
+ // want to use our bit decoding functions to
+ // check for the marker. But we must first
+ // discard any fractional bits left.
+ if (pjd->reset_interval > 0 && pjd->reset_count == pjd->reset_interval) {
+
+ // RST markers are byte-aligned, so force
+ // the bit-decoder to the next byte
+ // boundary.
+ bd_discard_to_byte_boundary(&bd);
+
+ while (1) {
+ int32_t value = bd_consume_bits(&bd, 8);
+ if (bd.inpos > bd.inlen)
+ return PJPEG_ERR_EOF;
+ if (value == 0xff)
+ break;
+ printf("RST SYNC\n");
+ }
+
+ int32_t marker_32 = bd_consume_bits(&bd, 8);
+
+// printf("%04x: RESET? %02x\n", *bd.inpos, marker_32);
+ if (marker_32 != (0xd0 + pjd->reset_next))
+ return PJPEG_ERR_RESET;
+
+ pjd->reset_count = 0;
+ pjd->reset_next = (pjd->reset_next + 1) & 0x7;
+
+ memset(dcpred, 0, sizeof(*dcpred));
+ }
+
+ for (int nsidx = 0; nsidx < ns; nsidx++) {
+
+ struct pjpeg_component *comp = &pjd->components[comp_idx[nsidx]];
+
+ int32_t block[64];
+
+ int qtabidx = comp->tq; // which quant table?
+
+ for (int sby = 0; sby < comp->scaley; sby++) {
+ for (int sbx = 0; sbx < comp->scalex; sbx++) {
+ // decode block for component nsidx
+ memset(block, 0, sizeof(block));
+
+ int dc_huff_table_idx = comp->tda >> 4;
+ int ac_huff_table_idx = 2 + (comp->tda & 0x0f);
+
+ if (!pjd->huff_codes_present[dc_huff_table_idx] ||
+ !pjd->huff_codes_present[ac_huff_table_idx])
+ return PJPEG_ERR_MISSING_DHT; // probably an MJPEG.
+
+
+ if (1) {
+ // do DC coefficient
+ uint32_t next16 = bd_peek_bits(&bd, 16);
+ struct pjpeg_huffman_code *huff_code = &pjd->huff_codes[dc_huff_table_idx][next16];
+ bd_consume_bits(&bd, huff_code->nbits);
+
+ int ssss = huff_code->code & 0x0f; // ssss == number of additional bits to read
+ int32_t value = bd_consume_bits(&bd, ssss);
+
+ // if high bit is clear, it's negative
+ if ((value & (1 << (ssss-1))) == 0)
+ value += ((-1) << ssss) + 1;
+
+ dcpred[nsidx] += value;
+ block[0] = dcpred[nsidx] * pjd->qtab[qtabidx][0];
+ }
+
+ if (1) {
+ // do AC coefficients
+ for (int coeff = 1; coeff < 64; coeff++) {
+
+ uint32_t next16 = bd_peek_bits(&bd, 16);
+
+ struct pjpeg_huffman_code *huff_code = &pjd->huff_codes[ac_huff_table_idx][next16];
+ bd_consume_bits(&bd, huff_code->nbits);
+
+ if (huff_code->code == 0) {
+ break; // EOB
+ }
+
+ int rrrr = huff_code->code >> 4; // run length of zeros
+ int ssss = huff_code->code & 0x0f;
+
+ int32_t value = bd_consume_bits(&bd, ssss);
+
+ // if high bit is clear, it's negative
+ if ((value & (1 << (ssss-1))) == 0)
+ value += ((-1) << ssss) + 1;
+
+ coeff += rrrr;
+
+ block[(int) ZZ[coeff]] = value * pjd->qtab[qtabidx][coeff];
+ }
+ }
+
+ // do IDCT
+
+ // output block's upper-left
+ // coordinate (in pixels) is
+ // (comp_x, comp_y).
+ uint32_t comp_x = (mcu_x * comp->scalex + sbx) * 8;
+ uint32_t comp_y = (mcu_y * comp->scaley + sby) * 8;
+ uint32_t dataidx = comp_y * comp->stride + comp_x;
+
+// pjpeg_idct_2D_u32(block, &comp->data[dataidx], comp->stride);
+ pjpeg_idct_2D_nanojpeg(block, &comp->data[dataidx], comp->stride);
+ }
+ }
+ }
+
+ pjd->reset_count++;
+// printf("%04x: reset count %d / %d\n", pjd->inpos, pjd->reset_count, pjd->reset_interval);
+
+ }
+ }
+
+ free(dcpred);
+ free(comp_idx);
+
+ break;
+ }
+
+ case 0xd9: { // EOI End of Image
+ goto got_end_of_image;
+ }
+
+ case 0xdd: { // Define Restart Interval
+ uint16_t length = bd_consume_bits(&bd, 16);
+ if (length != 4)
+ return PJPEG_ERR_DRI;
+
+ // reset interval measured in the number of MCUs
+ pjd->reset_interval = bd_consume_bits(&bd, 16);
+
+ break;
+ }
+
+ default: {
+ printf("pjepg: Unknown marker %02x at offset %04x\n", marker, marker_offset);
+
+ // try to skip it.
+ uint16_t length = bd_consume_bits(&bd, 16);
+ bd_discard_bytes(&bd, length - 2);
+ continue;
+ }
+ } // switch (marker)
+ } // while inpos < inlen
+
+ got_end_of_image:
+
+ return PJPEG_OKAY;
+}
+
+void pjpeg_destroy(pjpeg_t *pj)
+{
+ if (!pj)
+ return;
+
+ for (int i = 0; i < pj->ncomponents; i++)
+ free(pj->components[i].data);
+ free(pj->components);
+
+ free(pj);
+}
+
+
+// just grab the first component.
+image_u8_t *pjpeg_to_u8_baseline(pjpeg_t *pj)
+{
+ assert(pj->ncomponents > 0);
+
+ pjpeg_component_t *comp = &pj->components[0];
+
+ assert(comp->width >= pj->width && comp->height >= pj->height);
+
+ image_u8_t *im = image_u8_create(pj->width, pj->height);
+ for (int y = 0; y < im->height; y++)
+ memcpy(&im->buf[y*im->stride], &comp->data[y*comp->stride], pj->width);
+
+ return im;
+}
+
+static inline uint8_t clampd(double v)
+{
+ if (v < 0)
+ return 0;
+ if (v > 255)
+ return 255;
+
+ return (uint8_t) v;
+}
+
+static inline uint8_t clamp_u8(int32_t v)
+{
+ if (v < 0)
+ return 0;
+ if (v > 255)
+ return 255;
+ return v;
+}
+
+// color conversion formulas taken from JFIF spec v 1.02
+image_u8x3_t *pjpeg_to_u8x3_baseline(pjpeg_t *pj)
+{
+ assert(pj->ncomponents == 3);
+
+ pjpeg_component_t *Y = &pj->components[0];
+ pjpeg_component_t *Cb = &pj->components[1];
+ pjpeg_component_t *Cr = &pj->components[2];
+
+ int Cb_factor_y = Y->height / Cb->height;
+ int Cb_factor_x = Y->width / Cb->width;
+
+ int Cr_factor_y = Y->height / Cr->height;
+ int Cr_factor_x = Y->width / Cr->width;
+
+ image_u8x3_t *im = image_u8x3_create(pj->width, pj->height);
+
+ if (Cr_factor_y == 1 && Cr_factor_x == 1 && Cb_factor_y == 1 && Cb_factor_x == 1) {
+
+ for (int y = 0; y < pj->height; y++) {
+ for (int x = 0; x < pj->width; x++) {
+ int32_t y_val = Y->data[y*Y->stride + x] * 65536;
+ int32_t cb_val = Cb->data[y*Cb->stride + x] - 128;
+ int32_t cr_val = Cr->data[y*Cr->stride + x] - 128;
+
+ int32_t r_val = y_val + 91881 * cr_val;
+ int32_t g_val = y_val + -22554 * cb_val - 46802 * cr_val;
+ int32_t b_val = y_val + 116130 * cb_val;
+
+ im->buf[y*im->stride + 3*x + 0 ] = clamp_u8(r_val >> 16);
+ im->buf[y*im->stride + 3*x + 1 ] = clamp_u8(g_val >> 16);
+ im->buf[y*im->stride + 3*x + 2 ] = clamp_u8(b_val >> 16);
+ }
+ }
+ } else if (Cb_factor_y == Cr_factor_y && Cb_factor_x == Cr_factor_x) {
+ for (int by = 0; by < pj->height / Cb_factor_y; by++) {
+ for (int bx = 0; bx < pj->width / Cb_factor_x; bx++) {
+
+ int32_t cb_val = Cb->data[by*Cb->stride + bx] - 128;
+ int32_t cr_val = Cr->data[by*Cr->stride + bx] - 128;
+
+ int32_t r0 = 91881 * cr_val;
+ int32_t g0 = -22554 * cb_val - 46802 * cr_val;
+ int32_t b0 = 116130 * cb_val;
+
+ for (int dy = 0; dy < Cb_factor_y; dy++) {
+ int y = by*Cb_factor_y + dy;
+
+ for (int dx = 0; dx < Cb_factor_x; dx++) {
+ int x = bx*Cb_factor_x + dx;
+
+ int32_t y_val = Y->data[y*Y->stride + x] * 65536;
+
+ int32_t r_val = r0 + y_val;
+ int32_t g_val = g0 + y_val;
+ int32_t b_val = b0 + y_val;
+
+ im->buf[y*im->stride + 3*x + 0 ] = clamp_u8(r_val >> 16);
+ im->buf[y*im->stride + 3*x + 1 ] = clamp_u8(g_val >> 16);
+ im->buf[y*im->stride + 3*x + 2 ] = clamp_u8(b_val >> 16);
+ }
+ }
+ }
+ }
+ } else {
+
+ for (int y = 0; y < pj->height; y++) {
+ for (int x = 0; x < pj->width; x++) {
+ int32_t y_val = Y->data[y*Y->stride + x];
+ int32_t cb_val = Cb->data[(y / Cb_factor_y)*Cb->stride + (x / Cb_factor_x)] - 128;
+ int32_t cr_val = Cr->data[(y / Cr_factor_y)*Cr->stride + (x / Cr_factor_x)] - 128;
+
+ uint8_t r_val = clampd(y_val + 1.402 * cr_val);
+ uint8_t g_val = clampd(y_val - 0.34414 * cb_val - 0.71414 * cr_val);
+ uint8_t b_val = clampd(y_val + 1.772 * cb_val);
+
+ im->buf[y*im->stride + 3*x + 0 ] = r_val;
+ im->buf[y*im->stride + 3*x + 1 ] = g_val;
+ im->buf[y*im->stride + 3*x + 2 ] = b_val;
+ }
+ }
+ }
+
+ return im;
+}
+
+///////////////////////////////////////////////////////////////////
+// returns NULL if file loading fails.
+pjpeg_t *pjpeg_create_from_file(const char *path, uint32_t flags, int *error)
+{
+ FILE *f = fopen(path, "rb");
+ if (f == NULL)
+ return NULL;
+
+ fseek(f, 0, SEEK_END);
+ long buflen = ftell(f);
+
+ uint8_t *buf = malloc(buflen);
+ fseek(f, 0, SEEK_SET);
+ int res = fread(buf, 1, buflen, f);
+
+ if ( ferror(f) ){
+ debug_print ("Read failed");
+ clearerr(f);
+ }
+
+ fclose(f);
+ if (res != buflen) {
+ free(buf);
+ if (error)
+ *error = PJPEG_ERR_FILE;
+ return NULL;
+ }
+
+ pjpeg_t *pj = pjpeg_create_from_buffer(buf, buflen, flags, error);
+
+ free(buf);
+ return pj;
+}
+
+pjpeg_t *pjpeg_create_from_buffer(uint8_t *buf, int buflen, uint32_t flags, int *error)
+{
+ struct pjpeg_decode_state pjd;
+ memset(&pjd, 0, sizeof(pjd));
+
+ if (flags & PJPEG_MJPEG) {
+ pjd.in = mjpeg_dht;
+ pjd.inlen = sizeof(mjpeg_dht);
+ int result = pjpeg_decode_buffer(&pjd);
+ assert(result == 0);
+ }
+
+ pjd.in = buf;
+ pjd.inlen = buflen;
+ pjd.flags = flags;
+
+ int result = pjpeg_decode_buffer(&pjd);
+ if (error)
+ *error = result;
+
+ if (result) {
+ for (int i = 0; i < pjd.ncomponents; i++)
+ free(pjd.components[i].data);
+ free(pjd.components);
+
+ return NULL;
+ }
+
+ pjpeg_t *pj = calloc(1, sizeof(pjpeg_t));
+
+ pj->width = pjd.width;
+ pj->height = pjd.height;
+ pj->ncomponents = pjd.ncomponents;
+ pj->components = pjd.components;
+
+ return pj;
+}