Blame - common/pjpeg-idct.c - RealtimeRoboticsGroup/test

blob: ddbdde5037182039414314a34a9e85bfd66ef0af [file] [log] [blame]

Austin Schuh	3333ec7	2022-12-29 16:21:06 -0800	[diff] [blame^]	1	/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
				2	All rights reserved.
				3	This software was developed in the APRIL Robotics Lab under the
				4	direction of Edwin Olson, ebolson@umich.edu. This software may be
				5	available under alternative licensing terms; contact the address above.
				6	Redistribution and use in source and binary forms, with or without
				7	modification, are permitted provided that the following conditions are met:
				8	1. Redistributions of source code must retain the above copyright notice, this
				9	list of conditions and the following disclaimer.
				10	2. Redistributions in binary form must reproduce the above copyright notice,
				11	this list of conditions and the following disclaimer in the documentation
				12	and/or other materials provided with the distribution.
				13	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
				14	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				15	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				16	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
				17	ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
				18	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
				19	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
				20	ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				21	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				22	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				23	The views and conclusions contained in the software and documentation are those
				24	of the authors and should not be interpreted as representing official policies,
				25	either expressed or implied, of the Regents of The University of Michigan.
				26	*/
				27
				28	#include <math.h>
				29	#include <stdint.h>
				30
				31	#ifndef M_PI
				32	# define M_PI 3.141592653589793238462643383279502884196
				33	#endif
				34
				35	// 8 bits of fixed-point output
				36	//
				37	// This implementation has a worst-case complexity of 22 multiplies
				38	// and 64 adds. This makes it significantly worse (about 2x) than the
				39	// best-known fast inverse cosine transform methods. HOWEVER, zero
				40	// coefficients can be skipped over, and since that's common (often
				41	// more than half the coefficients are zero).
				42	//
				43	// The output is scaled by a factor of 256 (due to our fixed-point
				44	// integer arithmetic)..
				45	static inline void idct_1D_u32(int32_t in, int instride, int32_t out, int outstride)
				46	{
				47	for (int x = 0; x < 8; x++)
				48	out[x*outstride] = 0;
				49
				50	int32_t c;
				51
				52	c = in[0*instride];
				53	if (c) {
				54	// 181 181 181 181 181 181 181 181
				55	int32_t c181 = c * 181;
				56	out[0*outstride] += c181;
				57	out[1*outstride] += c181;
				58	out[2*outstride] += c181;
				59	out[3*outstride] += c181;
				60	out[4*outstride] += c181;
				61	out[5*outstride] += c181;
				62	out[6*outstride] += c181;
				63	out[7*outstride] += c181;
				64	}
				65
				66	c = in[1*instride];
				67	if (c) {
				68	// 251 212 142 49 -49 -142 -212 -251
				69	int32_t c251 = c * 251;
				70	int32_t c212 = c * 212;
				71	int32_t c142 = c * 142;
				72	int32_t c49 = c * 49;
				73	out[0*outstride] += c251;
				74	out[1*outstride] += c212;
				75	out[2*outstride] += c142;
				76	out[3*outstride] += c49;
				77	out[4*outstride] -= c49;
				78	out[5*outstride] -= c142;
				79	out[6*outstride] -= c212;
				80	out[7*outstride] -= c251;
				81	}
				82
				83	c = in[2*instride];
				84	if (c) {
				85	// 236 97 -97 -236 -236 -97 97 236
				86	int32_t c236 = c*236;
				87	int32_t c97 = c*97;
				88	out[0*outstride] += c236;
				89	out[1*outstride] += c97;
				90	out[2*outstride] -= c97;
				91	out[3*outstride] -= c236;
				92	out[4*outstride] -= c236;
				93	out[5*outstride] -= c97;
				94	out[6*outstride] += c97;
				95	out[7*outstride] += c236;
				96	}
				97
				98	c = in[3*instride];
				99	if (c) {
				100	// 212 -49 -251 -142 142 251 49 -212
				101	int32_t c212 = c*212;
				102	int32_t c49 = c*49;
				103	int32_t c251 = c*251;
				104	int32_t c142 = c*142;
				105	out[0*outstride] += c212;
				106	out[1*outstride] -= c49;
				107	out[2*outstride] -= c251;
				108	out[3*outstride] -= c142;
				109	out[4*outstride] += c142;
				110	out[5*outstride] += c251;
				111	out[6*outstride] += c49;
				112	out[7*outstride] -= c212;
				113	}
				114
				115	c = in[4*instride];
				116	if (c) {
				117	// 181 -181 -181 181 181 -181 -181 181
				118	int32_t c181 = c*181;
				119	out[0*outstride] += c181;
				120	out[1*outstride] -= c181;
				121	out[2*outstride] -= c181;
				122	out[3*outstride] += c181;
				123	out[4*outstride] += c181;
				124	out[5*outstride] -= c181;
				125	out[6*outstride] -= c181;
				126	out[7*outstride] += c181;
				127	}
				128
				129	c = in[5*instride];
				130	if (c) {
				131	// 142 -251 49 212 -212 -49 251 -142
				132	int32_t c142 = c*142;
				133	int32_t c251 = c*251;
				134	int32_t c49 = c*49;
				135	int32_t c212 = c*212;
				136	out[0*outstride] += c142;
				137	out[1*outstride] -= c251;
				138	out[2*outstride] += c49;
				139	out[3*outstride] += c212;
				140	out[4*outstride] -= c212;
				141	out[5*outstride] -= c49;
				142	out[6*outstride] += c251;
				143	out[7*outstride] -= c142;
				144	}
				145
				146	c = in[6*instride];
				147	if (c) {
				148	// 97 -236 236 -97 -97 236 -236 97
				149	int32_t c97 = c*97;
				150	int32_t c236 = c*236;
				151	out[0*outstride] += c97;
				152	out[1*outstride] -= c236;
				153	out[2*outstride] += c236;
				154	out[3*outstride] -= c97;
				155	out[4*outstride] -= c97;
				156	out[5*outstride] += c236;
				157	out[6*outstride] -= c236;
				158	out[7*outstride] += c97;
				159	}
				160
				161	c = in[7*instride];
				162	if (c) {
				163	// 49 -142 212 -251 251 -212 142 -49
				164	int32_t c49 = c*49;
				165	int32_t c142 = c*142;
				166	int32_t c212 = c*212;
				167	int32_t c251 = c*251;
				168	out[0*outstride] += c49;
				169	out[1*outstride] -= c142;
				170	out[2*outstride] += c212;
				171	out[3*outstride] -= c251;
				172	out[4*outstride] += c251;
				173	out[5*outstride] -= c212;
				174	out[6*outstride] += c142;
				175	out[7*outstride] -= c49;
				176	}
				177	}
				178
				179	void pjpeg_idct_2D_u32(int32_t in[64], uint8_t *out, uint32_t outstride)
				180	{
				181	int32_t tmp[64];
				182
				183	// idct on rows
				184	for (int y = 0; y < 8; y++)
				185	idct_1D_u32(&in[8y], 1, &tmp[8y], 1);
				186
				187	int32_t tmp2[64];
				188
				189	// idct on columns
				190	for (int x = 0; x < 8; x++)
				191	idct_1D_u32(&tmp[x], 8, &tmp2[x], 8);
				192
				193	// scale, adjust bias, and clamp
				194	for (int y = 0; y < 8; y++) {
				195	for (int x = 0; x < 8; x++) {
				196	int i = 8*y + x;
				197
				198	// Shift of 18: the divide by 4 as part of the idct, and a shift by 16
				199	// to undo the fixed-point arithmetic. (We accumulated 8 bits of
				200	// fractional precision during each of the row and column IDCTs)
				201	//
				202	// Originally:
				203	// int32_t v = (tmp2[i] >> 18) + 128;
				204	//
				205	// Move the add before the shift and we can do rounding at
				206	// the same time.
				207	const int32_t offset = (128 << 18) + (1 << 17);
				208	int32_t v = (tmp2[i] + offset) >> 18;
				209
				210	if (v < 0)
				211	v = 0;
				212	if (v > 255)
				213	v = 255;
				214
				215	out[y*outstride + x] = v;
				216	}
				217	}
				218	}
				219
				220	///////////////////////////////////////////////////////
				221	// Below: a "as straight-forward as I can make" implementation.
				222	static inline void idct_1D_double(double in, int instride, double out, int outstride)
				223	{
				224	for (int x = 0; x < 8; x++)
				225	out[x*outstride] = 0;
				226
				227	// iterate over IDCT coefficients
				228	double Cu = 1/sqrt(2);
				229
				230	for (int u = 0; u < 8; u++, Cu = 1) {
				231
				232	double coeff = in[u*instride];
				233	if (coeff == 0)
				234	continue;
				235
				236	for (int x = 0; x < 8; x++)
				237	out[xoutstride] += Cucos((2x+1)uM_PI/16) coeff;
				238	}
				239	}
				240
				241	void pjpeg_idct_2D_double(int32_t in[64], uint8_t *out, uint32_t outstride)
				242	{
				243	double din[64], dout[64];
				244	for (int i = 0; i < 64; i++)
				245	din[i] = in[i];
				246
				247	double tmp[64];
				248
				249	// idct on rows
				250	for (int y = 0; y < 8; y++)
				251	idct_1D_double(&din[8y], 1, &tmp[8y], 1);
				252
				253	// idct on columns
				254	for (int x = 0; x < 8; x++)
				255	idct_1D_double(&tmp[x], 8, &dout[x], 8);
				256
				257	// scale, adjust bias, and clamp
				258	for (int y = 0; y < 8; y++) {
				259	for (int x = 0; x < 8; x++) {
				260	int i = 8*y + x;
				261
				262	dout[i] = (dout[i] / 4) + 128;
				263	if (dout[i] < 0)
				264	dout[i] = 0;
				265	if (dout[i] > 255)
				266	dout[i] = 255;
				267
				268	// XXX round by adding +.5?
				269	out[y*outstride + x] = dout[i];
				270	}
				271	}
				272	}
				273
				274	//////////////////////////////////////////////
				275	static inline unsigned char njClip(const int x) {
				276	return (x < 0) ? 0 : ((x > 0xFF) ? 0xFF : (unsigned char) x);
				277	}
				278
				279	#define W1 2841
				280	#define W2 2676
				281	#define W3 2408
				282	#define W5 1609
				283	#define W6 1108
				284	#define W7 565
				285
				286	static inline void njRowIDCT(int* blk) {
				287	int x0, x1, x2, x3, x4, x5, x6, x7, x8;
				288	if (!((x1 = blk[4] << 11)
				289	\| (x2 = blk[6])
				290	\| (x3 = blk[2])
				291	\| (x4 = blk[1])
				292	\| (x5 = blk[7])
				293	\| (x6 = blk[5])
				294	\| (x7 = blk[3])))
				295	{
				296	blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3;
				297	return;
				298	}
				299	x0 = (blk[0] << 11) + 128;
				300	x8 = W7 * (x4 + x5);
				301	x4 = x8 + (W1 - W7) * x4;
				302	x5 = x8 - (W1 + W7) * x5;
				303	x8 = W3 * (x6 + x7);
				304	x6 = x8 - (W3 - W5) * x6;
				305	x7 = x8 - (W3 + W5) * x7;
				306	x8 = x0 + x1;
				307	x0 -= x1;
				308	x1 = W6 * (x3 + x2);
				309	x2 = x1 - (W2 + W6) * x2;
				310	x3 = x1 + (W2 - W6) * x3;
				311	x1 = x4 + x6;
				312	x4 -= x6;
				313	x6 = x5 + x7;
				314	x5 -= x7;
				315	x7 = x8 + x3;
				316	x8 -= x3;
				317	x3 = x0 + x2;
				318	x0 -= x2;
				319	x2 = (181 * (x4 + x5) + 128) >> 8;
				320	x4 = (181 * (x4 - x5) + 128) >> 8;
				321	blk[0] = (x7 + x1) >> 8;
				322	blk[1] = (x3 + x2) >> 8;
				323	blk[2] = (x0 + x4) >> 8;
				324	blk[3] = (x8 + x6) >> 8;
				325	blk[4] = (x8 - x6) >> 8;
				326	blk[5] = (x0 - x4) >> 8;
				327	blk[6] = (x3 - x2) >> 8;
				328	blk[7] = (x7 - x1) >> 8;
				329	}
				330
				331	static inline void njColIDCT(const int* blk, unsigned char *out, int stride) {
				332	int x0, x1, x2, x3, x4, x5, x6, x7, x8;
				333	if (!((x1 = blk[8*4] << 8)
				334	\| (x2 = blk[8*6])
				335	\| (x3 = blk[8*2])
				336	\| (x4 = blk[8*1])
				337	\| (x5 = blk[8*7])
				338	\| (x6 = blk[8*5])
				339	\| (x7 = blk[8*3])))
				340	{
				341	x1 = njClip(((blk[0] + 32) >> 6) + 128);
				342	for (x0 = 8; x0; --x0) {
				343	*out = (unsigned char) x1;
				344	out += stride;
				345	}
				346	return;
				347	}
				348	x0 = (blk[0] << 8) + 8192;
				349	x8 = W7 * (x4 + x5) + 4;
				350	x4 = (x8 + (W1 - W7) * x4) >> 3;
				351	x5 = (x8 - (W1 + W7) * x5) >> 3;
				352	x8 = W3 * (x6 + x7) + 4;
				353	x6 = (x8 - (W3 - W5) * x6) >> 3;
				354	x7 = (x8 - (W3 + W5) * x7) >> 3;
				355	x8 = x0 + x1;
				356	x0 -= x1;
				357	x1 = W6 * (x3 + x2) + 4;
				358	x2 = (x1 - (W2 + W6) * x2) >> 3;
				359	x3 = (x1 + (W2 - W6) * x3) >> 3;
				360	x1 = x4 + x6;
				361	x4 -= x6;
				362	x6 = x5 + x7;
				363	x5 -= x7;
				364	x7 = x8 + x3;
				365	x8 -= x3;
				366	x3 = x0 + x2;
				367	x0 -= x2;
				368	x2 = (181 * (x4 + x5) + 128) >> 8;
				369	x4 = (181 * (x4 - x5) + 128) >> 8;
				370	*out = njClip(((x7 + x1) >> 14) + 128); out += stride;
				371	*out = njClip(((x3 + x2) >> 14) + 128); out += stride;
				372	*out = njClip(((x0 + x4) >> 14) + 128); out += stride;
				373	*out = njClip(((x8 + x6) >> 14) + 128); out += stride;
				374	*out = njClip(((x8 - x6) >> 14) + 128); out += stride;
				375	*out = njClip(((x0 - x4) >> 14) + 128); out += stride;
				376	*out = njClip(((x3 - x2) >> 14) + 128); out += stride;
				377	*out = njClip(((x7 - x1) >> 14) + 128);
				378	}
				379
				380	void pjpeg_idct_2D_nanojpeg(int32_t in[64], uint8_t *out, uint32_t outstride)
				381	{
				382	int coef;
				383
				384	for (coef = 0; coef < 64; coef += 8)
				385	njRowIDCT(&in[coef]);
				386	for (coef = 0; coef < 8; ++coef)
				387	njColIDCT(&in[coef], &out[coef], outstride);
				388	}