Blame - common/matd.c - RealtimeRoboticsGroup/test

blob: 54449d9f27a4dae6d1422c37c71bdeeb046c94ba [file] [log] [blame]

Austin Schuh	3333ec7	2022-12-29 16:21:06 -0800	[diff] [blame^]	1	/* Copyright (C) 2013-2016, The Regents of The University of Michigan.
				2	All rights reserved.
				3	This software was developed in the APRIL Robotics Lab under the
				4	direction of Edwin Olson, ebolson@umich.edu. This software may be
				5	available under alternative licensing terms; contact the address above.
				6	Redistribution and use in source and binary forms, with or without
				7	modification, are permitted provided that the following conditions are met:
				8	1. Redistributions of source code must retain the above copyright notice, this
				9	list of conditions and the following disclaimer.
				10	2. Redistributions in binary form must reproduce the above copyright notice,
				11	this list of conditions and the following disclaimer in the documentation
				12	and/or other materials provided with the distribution.
				13	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
				14	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				15	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				16	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
				17	ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
				18	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
				19	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
				20	ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				21	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				22	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				23	The views and conclusions contained in the software and documentation are those
				24	of the authors and should not be interpreted as representing official policies,
				25	either expressed or implied, of the Regents of The University of Michigan.
				26	*/
				27
				28	#include <stdio.h>
				29	#include <stdlib.h>
				30	#include <string.h>
				31	#include <stdarg.h>
				32	#include <assert.h>
				33	#include <math.h>
				34	#include <float.h>
				35
				36	#include "common/math_util.h"
				37	#include "common/svd22.h"
				38	#include "common/matd.h"
				39	#include "common/debug_print.h"
				40
				41	// a matd_t with rows=0 cols=0 is a SCALAR.
				42
				43	// to ease creating mati, matf, etc. in the future.
				44	#define TYPE double
				45
				46	matd_t *matd_create(int rows, int cols)
				47	{
				48	assert(rows >= 0);
				49	assert(cols >= 0);
				50
				51	if (rows == 0 \|\| cols == 0)
				52	return matd_create_scalar(0);
				53
				54	matd_t m = calloc(1, sizeof(matd_t) + (rowscols*sizeof(double)));
				55	m->nrows = rows;
				56	m->ncols = cols;
				57
				58	return m;
				59	}
				60
				61	matd_t *matd_create_scalar(TYPE v)
				62	{
				63	matd_t *m = calloc(1, sizeof(matd_t) + sizeof(double));
				64	m->nrows = 0;
				65	m->ncols = 0;
				66	m->data[0] = v;
				67
				68	return m;
				69	}
				70
				71	matd_t matd_create_data(int rows, int cols, const TYPE data)
				72	{
				73	if (rows == 0 \|\| cols == 0)
				74	return matd_create_scalar(data[0]);
				75
				76	matd_t *m = matd_create(rows, cols);
				77	for (int i = 0; i < rows * cols; i++)
				78	m->data[i] = data[i];
				79
				80	return m;
				81	}
				82
				83	matd_t matd_create_dataf(int rows, int cols, const float data)
				84	{
				85	if (rows == 0 \|\| cols == 0)
				86	return matd_create_scalar(data[0]);
				87
				88	matd_t *m = matd_create(rows, cols);
				89	for (int i = 0; i < rows * cols; i++)
				90	m->data[i] = (double)data[i];
				91
				92	return m;
				93	}
				94
				95	matd_t *matd_identity(int dim)
				96	{
				97	if (dim == 0)
				98	return matd_create_scalar(1);
				99
				100	matd_t *m = matd_create(dim, dim);
				101	for (int i = 0; i < dim; i++)
				102	MATD_EL(m, i, i) = 1;
				103
				104	return m;
				105	}
				106
				107	// row and col are zero-based
				108	TYPE matd_get(const matd_t *m, int row, int col)
				109	{
				110	assert(m != NULL);
				111	assert(!matd_is_scalar(m));
				112	assert(row >= 0);
				113	assert(row < m->nrows);
				114	assert(col >= 0);
				115	assert(col < m->ncols);
				116
				117	return MATD_EL(m, row, col);
				118	}
				119
				120	// row and col are zero-based
				121	void matd_put(matd_t *m, int row, int col, TYPE value)
				122	{
				123	assert(m != NULL);
				124
				125	if (matd_is_scalar(m)) {
				126	matd_put_scalar(m, value);
				127	return;
				128	}
				129
				130	assert(row >= 0);
				131	assert(row < m->nrows);
				132	assert(col >= 0);
				133	assert(col < m->ncols);
				134
				135	MATD_EL(m, row, col) = value;
				136	}
				137
				138	TYPE matd_get_scalar(const matd_t *m)
				139	{
				140	assert(m != NULL);
				141	assert(matd_is_scalar(m));
				142
				143	return (m->data[0]);
				144	}
				145
				146	void matd_put_scalar(matd_t *m, TYPE value)
				147	{
				148	assert(m != NULL);
				149	assert(matd_is_scalar(m));
				150
				151	m->data[0] = value;
				152	}
				153
				154	matd_t matd_copy(const matd_t m)
				155	{
				156	assert(m != NULL);
				157
				158	matd_t *x = matd_create(m->nrows, m->ncols);
				159	if (matd_is_scalar(m))
				160	x->data[0] = m->data[0];
				161	else
				162	memcpy(x->data, m->data, sizeof(TYPE)m->ncolsm->nrows);
				163
				164	return x;
				165	}
				166
				167	matd_t matd_select(const matd_t a, int r0, int r1, int c0, int c1)
				168	{
				169	assert(a != NULL);
				170
				171	assert(r0 >= 0 && r0 < a->nrows);
				172	assert(c0 >= 0 && c0 < a->ncols);
				173
				174	int nrows = r1 - r0 + 1;
				175	int ncols = c1 - c0 + 1;
				176
				177	matd_t * r = matd_create(nrows, ncols);
				178
				179	for (int row = r0; row <= r1; row++)
				180	for (int col = c0; col <= c1; col++)
				181	MATD_EL(r,row-r0,col-c0) = MATD_EL(a,row,col);
				182
				183	return r;
				184	}
				185
				186	void matd_print(const matd_t m, const char fmt)
				187	{
				188	assert(m != NULL);
				189	assert(fmt != NULL);
				190
				191	if (matd_is_scalar(m)) {
				192	printf(fmt, MATD_EL(m, 0, 0));
				193	printf("\n");
				194	} else {
				195	for (int i = 0; i < m->nrows; i++) {
				196	for (int j = 0; j < m->ncols; j++) {
				197	printf(fmt, MATD_EL(m, i, j));
				198	}
				199	printf("\n");
				200	}
				201	}
				202	}
				203
				204	void matd_print_transpose(const matd_t m, const char fmt)
				205	{
				206	assert(m != NULL);
				207	assert(fmt != NULL);
				208
				209	if (matd_is_scalar(m)) {
				210	printf(fmt, MATD_EL(m, 0, 0));
				211	printf("\n");
				212	} else {
				213	for (int j = 0; j < m->ncols; j++) {
				214	for (int i = 0; i < m->nrows; i++) {
				215	printf(fmt, MATD_EL(m, i, j));
				216	}
				217	printf("\n");
				218	}
				219	}
				220	}
				221
				222	void matd_destroy(matd_t *m)
				223	{
				224	if (!m)
				225	return;
				226
				227	assert(m != NULL);
				228	free(m);
				229	}
				230
				231	matd_t matd_multiply(const matd_t a, const matd_t *b)
				232	{
				233	assert(a != NULL);
				234	assert(b != NULL);
				235
				236	if (matd_is_scalar(a))
				237	return matd_scale(b, a->data[0]);
				238	if (matd_is_scalar(b))
				239	return matd_scale(a, b->data[0]);
				240
				241	assert(a->ncols == b->nrows);
				242	matd_t *m = matd_create(a->nrows, b->ncols);
				243
				244	for (int i = 0; i < m->nrows; i++) {
				245	for (int j = 0; j < m->ncols; j++) {
				246	TYPE acc = 0;
				247	for (int k = 0; k < a->ncols; k++) {
				248	acc += MATD_EL(a, i, k) * MATD_EL(b, k, j);
				249	}
				250	MATD_EL(m, i, j) = acc;
				251	}
				252	}
				253
				254	return m;
				255	}
				256
				257	matd_t matd_scale(const matd_t a, double s)
				258	{
				259	assert(a != NULL);
				260
				261	if (matd_is_scalar(a))
				262	return matd_create_scalar(a->data[0] * s);
				263
				264	matd_t *m = matd_create(a->nrows, a->ncols);
				265
				266	for (int i = 0; i < m->nrows; i++) {
				267	for (int j = 0; j < m->ncols; j++) {
				268	MATD_EL(m, i, j) = s * MATD_EL(a, i, j);
				269	}
				270	}
				271
				272	return m;
				273	}
				274
				275	void matd_scale_inplace(matd_t *a, double s)
				276	{
				277	assert(a != NULL);
				278
				279	if (matd_is_scalar(a)) {
				280	a->data[0] *= s;
				281	return;
				282	}
				283
				284	for (int i = 0; i < a->nrows; i++) {
				285	for (int j = 0; j < a->ncols; j++) {
				286	MATD_EL(a, i, j) *= s;
				287	}
				288	}
				289	}
				290
				291	matd_t matd_add(const matd_t a, const matd_t *b)
				292	{
				293	assert(a != NULL);
				294	assert(b != NULL);
				295	assert(a->nrows == b->nrows);
				296	assert(a->ncols == b->ncols);
				297
				298	if (matd_is_scalar(a))
				299	return matd_create_scalar(a->data[0] + b->data[0]);
				300
				301	matd_t *m = matd_create(a->nrows, a->ncols);
				302
				303	for (int i = 0; i < m->nrows; i++) {
				304	for (int j = 0; j < m->ncols; j++) {
				305	MATD_EL(m, i, j) = MATD_EL(a, i, j) + MATD_EL(b, i, j);
				306	}
				307	}
				308
				309	return m;
				310	}
				311
				312	void matd_add_inplace(matd_t a, const matd_t b)
				313	{
				314	assert(a != NULL);
				315	assert(b != NULL);
				316	assert(a->nrows == b->nrows);
				317	assert(a->ncols == b->ncols);
				318
				319	if (matd_is_scalar(a)) {
				320	a->data[0] += b->data[0];
				321	return;
				322	}
				323
				324	for (int i = 0; i < a->nrows; i++) {
				325	for (int j = 0; j < a->ncols; j++) {
				326	MATD_EL(a, i, j) += MATD_EL(b, i, j);
				327	}
				328	}
				329	}
				330
				331
				332	matd_t matd_subtract(const matd_t a, const matd_t *b)
				333	{
				334	assert(a != NULL);
				335	assert(b != NULL);
				336	assert(a->nrows == b->nrows);
				337	assert(a->ncols == b->ncols);
				338
				339	if (matd_is_scalar(a))
				340	return matd_create_scalar(a->data[0] - b->data[0]);
				341
				342	matd_t *m = matd_create(a->nrows, a->ncols);
				343
				344	for (int i = 0; i < m->nrows; i++) {
				345	for (int j = 0; j < m->ncols; j++) {
				346	MATD_EL(m, i, j) = MATD_EL(a, i, j) - MATD_EL(b, i, j);
				347	}
				348	}
				349
				350	return m;
				351	}
				352
				353	void matd_subtract_inplace(matd_t a, const matd_t b)
				354	{
				355	assert(a != NULL);
				356	assert(b != NULL);
				357	assert(a->nrows == b->nrows);
				358	assert(a->ncols == b->ncols);
				359
				360	if (matd_is_scalar(a)) {
				361	a->data[0] -= b->data[0];
				362	return;
				363	}
				364
				365	for (int i = 0; i < a->nrows; i++) {
				366	for (int j = 0; j < a->ncols; j++) {
				367	MATD_EL(a, i, j) -= MATD_EL(b, i, j);
				368	}
				369	}
				370	}
				371
				372
				373	matd_t matd_transpose(const matd_t a)
				374	{
				375	assert(a != NULL);
				376
				377	if (matd_is_scalar(a))
				378	return matd_create_scalar(a->data[0]);
				379
				380	matd_t *m = matd_create(a->ncols, a->nrows);
				381
				382	for (int i = 0; i < a->nrows; i++) {
				383	for (int j = 0; j < a->ncols; j++) {
				384	MATD_EL(m, j, i) = MATD_EL(a, i, j);
				385	}
				386	}
				387	return m;
				388	}
				389
				390	static
				391	double matd_det_general(const matd_t *a)
				392	{
				393	// Use LU decompositon to calculate the determinant
				394	matd_plu_t *mlu = matd_plu(a);
				395	matd_t *L = matd_plu_l(mlu);
				396	matd_t *U = matd_plu_u(mlu);
				397
				398	// The determinants of the L and U matrices are the products of
				399	// their respective diagonal elements
				400	double detL = 1; double detU = 1;
				401	for (int i = 0; i < a->nrows; i++) {
				402	detL *= matd_get(L, i, i);
				403	detU *= matd_get(U, i, i);
				404	}
				405
				406	// The determinant of a can be calculated as
				407	// epsilondet(L)det(U),
				408	// where epsilon is just the sign of the corresponding permutation
				409	// (which is +1 for an even number of permutations and is −1
				410	// for an uneven number of permutations).
				411	double det = mlu->pivsign * detL * detU;
				412
				413	// Cleanup
				414	matd_plu_destroy(mlu);
				415	matd_destroy(L);
				416	matd_destroy(U);
				417
				418	return det;
				419	}
				420
				421	double matd_det(const matd_t *a)
				422	{
				423	assert(a != NULL);
				424	assert(a->nrows == a->ncols);
				425
				426	switch(a->nrows) {
				427	case 0:
				428	// scalar: invalid
				429	assert(a->nrows > 0);
				430	break;
				431
				432	case 1:
				433	// 1x1 matrix
				434	return a->data[0];
				435
				436	case 2:
				437	// 2x2 matrix
				438	return a->data[0] * a->data[3] - a->data[1] * a->data[2];
				439
				440	case 3:
				441	// 3x3 matrix
				442	return a->data[0]a->data[4]a->data[8]
				443	- a->data[0]a->data[5]a->data[7]
				444	+ a->data[1]a->data[5]a->data[6]
				445	- a->data[1]a->data[3]a->data[8]
				446	+ a->data[2]a->data[3]a->data[7]
				447	- a->data[2]a->data[4]a->data[6];
				448
				449	case 4: {
				450	// 4x4 matrix
				451	double m00 = MATD_EL(a,0,0), m01 = MATD_EL(a,0,1), m02 = MATD_EL(a,0,2), m03 = MATD_EL(a,0,3);
				452	double m10 = MATD_EL(a,1,0), m11 = MATD_EL(a,1,1), m12 = MATD_EL(a,1,2), m13 = MATD_EL(a,1,3);
				453	double m20 = MATD_EL(a,2,0), m21 = MATD_EL(a,2,1), m22 = MATD_EL(a,2,2), m23 = MATD_EL(a,2,3);
				454	double m30 = MATD_EL(a,3,0), m31 = MATD_EL(a,3,1), m32 = MATD_EL(a,3,2), m33 = MATD_EL(a,3,3);
				455
				456	return m00 * m11 * m22 * m33 - m00 * m11 * m23 * m32 -
				457	m00 * m21 * m12 * m33 + m00 * m21 * m13 * m32 + m00 * m31 * m12 * m23 -
				458	m00 * m31 * m13 * m22 - m10 * m01 * m22 * m33 +
				459	m10 * m01 * m23 * m32 + m10 * m21 * m02 * m33 -
				460	m10 * m21 * m03 * m32 - m10 * m31 * m02 * m23 +
				461	m10 * m31 * m03 * m22 + m20 * m01 * m12 * m33 -
				462	m20 * m01 * m13 * m32 - m20 * m11 * m02 * m33 +
				463	m20 * m11 * m03 * m32 + m20 * m31 * m02 * m13 -
				464	m20 * m31 * m03 * m12 - m30 * m01 * m12 * m23 +
				465	m30 * m01 * m13 * m22 + m30 * m11 * m02 * m23 -
				466	m30 * m11 * m03 * m22 - m30 * m21 * m02 * m13 +
				467	m30 * m21 * m03 * m12;
				468	}
				469
				470	default:
				471	return matd_det_general(a);
				472	}
				473
				474	assert(0);
				475	return 0;
				476	}
				477
				478	// returns NULL if the matrix is (exactly) singular. Caller is
				479	// otherwise responsible for knowing how to cope with badly
				480	// conditioned matrices.
				481	matd_t matd_inverse(const matd_t x)
				482	{
				483	matd_t *m = NULL;
				484
				485	assert(x != NULL);
				486	assert(x->nrows == x->ncols);
				487
				488	if (matd_is_scalar(x)) {
				489	if (x->data[0] == 0)
				490	return NULL;
				491
				492	return matd_create_scalar(1.0 / x->data[0]);
				493	}
				494
				495	switch(x->nrows) {
				496	case 1: {
				497	double det = x->data[0];
				498	if (det == 0)
				499	return NULL;
				500
				501	double invdet = 1.0 / det;
				502
				503	m = matd_create(x->nrows, x->nrows);
				504	MATD_EL(m, 0, 0) = 1.0 * invdet;
				505	return m;
				506	}
				507
				508	case 2: {
				509	double det = x->data[0] * x->data[3] - x->data[1] * x->data[2];
				510	if (det == 0)
				511	return NULL;
				512
				513	double invdet = 1.0 / det;
				514
				515	m = matd_create(x->nrows, x->nrows);
				516	MATD_EL(m, 0, 0) = MATD_EL(x, 1, 1) * invdet;
				517	MATD_EL(m, 0, 1) = - MATD_EL(x, 0, 1) * invdet;
				518	MATD_EL(m, 1, 0) = - MATD_EL(x, 1, 0) * invdet;
				519	MATD_EL(m, 1, 1) = MATD_EL(x, 0, 0) * invdet;
				520	return m;
				521	}
				522
				523	default: {
				524	matd_plu_t *plu = matd_plu(x);
				525
				526	matd_t *inv = NULL;
				527	if (!plu->singular) {
				528	matd_t *ident = matd_identity(x->nrows);
				529	inv = matd_plu_solve(plu, ident);
				530	matd_destroy(ident);
				531	}
				532
				533	matd_plu_destroy(plu);
				534
				535	return inv;
				536	}
				537	}
				538
				539	return NULL; // unreachable
				540	}
				541
				542
				543
				544	// TODO Optimization: Some operations we could perform in-place,
				545	// saving some memory allocation work. E.g., ADD, SUBTRACT. Just need
				546	// to make sure that we don't do an in-place modification on a matrix
				547	// that was an input argument!
				548
				549	// handle right-associative operators, greedily consuming them. These
				550	// include transpose and inverse. This is called by the main recursion
				551	// method.
				552	static inline matd_t matd_op_gobble_right(const char expr, int pos, matd_t acc, matd_t *garb, int garbpos)
				553	{
				554	while (expr[*pos] != 0) {
				555
				556	switch (expr[*pos]) {
				557
				558	case '\'': {
				559	assert(acc != NULL); // either a syntax error or a math op failed, producing null
				560	matd_t *res = matd_transpose(acc);
				561	garb[*garbpos] = res;
				562	(*garbpos)++;
				563	acc = res;
				564
				565	(*pos)++;
				566	break;
				567	}
				568
				569	// handle inverse ^-1. No other exponents are allowed.
				570	case '^': {
				571	assert(acc != NULL);
				572	assert(expr[*pos+1] == '-');
				573	assert(expr[*pos+2] == '1');
				574
				575	matd_t *res = matd_inverse(acc);
				576	garb[*garbpos] = res;
				577	(*garbpos)++;
				578	acc = res;
				579
				580	(*pos)+=3;
				581	break;
				582	}
				583
				584	default:
				585	return acc;
				586	}
				587	}
				588
				589	return acc;
				590	}
				591
				592	// @garb, garbpos A list of every matrix allocated during evaluation... used to assist cleanup.
				593	// @oneterm: we should return at the end of this term (i.e., stop at a PLUS, MINUS, LPAREN).
				594	static matd_t matd_op_recurse(const char expr, int pos, matd_t acc, matd_t *args, int argpos,
				595	matd_t *garb, int garbpos, int oneterm)
				596	{
				597	while (expr[*pos] != 0) {
				598
				599	switch (expr[*pos]) {
				600
				601	case '(': {
				602	if (oneterm && acc != NULL)
				603	return acc;
				604	(*pos)++;
				605	matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 0);
				606	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				607
				608	if (acc == NULL) {
				609	acc = rhs;
				610	} else {
				611	matd_t *res = matd_multiply(acc, rhs);
				612	garb[*garbpos] = res;
				613	(*garbpos)++;
				614	acc = res;
				615	}
				616
				617	break;
				618	}
				619
				620	case ')': {
				621	if (oneterm)
				622	return acc;
				623
				624	(*pos)++;
				625	return acc;
				626	}
				627
				628	case '*': {
				629	(*pos)++;
				630
				631	matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
				632	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				633
				634	if (acc == NULL) {
				635	acc = rhs;
				636	} else {
				637	matd_t *res = matd_multiply(acc, rhs);
				638	garb[*garbpos] = res;
				639	(*garbpos)++;
				640	acc = res;
				641	}
				642
				643	break;
				644	}
				645
				646	case 'F': {
				647	matd_t rhs = args[argpos];
				648	garb[*garbpos] = rhs;
				649	(*garbpos)++;
				650
				651	(*pos)++;
				652	(*argpos)++;
				653
				654	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				655
				656	if (acc == NULL) {
				657	acc = rhs;
				658	} else {
				659	matd_t *res = matd_multiply(acc, rhs);
				660	garb[*garbpos] = res;
				661	(*garbpos)++;
				662	acc = res;
				663	}
				664
				665	break;
				666	}
				667
				668	case 'M': {
				669	matd_t rhs = args[argpos];
				670
				671	(*pos)++;
				672	(*argpos)++;
				673
				674	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				675
				676	if (acc == NULL) {
				677	acc = rhs;
				678	} else {
				679	matd_t *res = matd_multiply(acc, rhs);
				680	garb[*garbpos] = res;
				681	(*garbpos)++;
				682	acc = res;
				683	}
				684
				685	break;
				686	}
				687
				688	/*
				689	case 'D': {
				690	int rows = expr[*pos+1]-'0';
				691	int cols = expr[*pos+2]-'0';
				692
				693	matd_t *rhs = matd_create(rows, cols);
				694
				695	break;
				696	}
				697	*/
				698	// a constant (SCALAR) defined inline. Treat just like M, creating a matd_t on the fly.
				699	case '0':
				700	case '1':
				701	case '2':
				702	case '3':
				703	case '4':
				704	case '5':
				705	case '6':
				706	case '7':
				707	case '8':
				708	case '9':
				709	case '.': {
				710	const char start = &expr[pos];
				711	char *end;
				712	double s = strtod(start, &end);
				713	(*pos) += (end - start);
				714	matd_t *rhs = matd_create_scalar(s);
				715	garb[*garbpos] = rhs;
				716	(*garbpos)++;
				717
				718	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				719
				720	if (acc == NULL) {
				721	acc = rhs;
				722	} else {
				723	matd_t *res = matd_multiply(acc, rhs);
				724	garb[*garbpos] = res;
				725	(*garbpos)++;
				726	acc = res;
				727	}
				728
				729	break;
				730	}
				731
				732	case '+': {
				733	if (oneterm && acc != NULL)
				734	return acc;
				735
				736	// don't support unary plus
				737	assert(acc != NULL);
				738	(*pos)++;
				739	matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
				740	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				741
				742	matd_t *res = matd_add(acc, rhs);
				743
				744	garb[*garbpos] = res;
				745	(*garbpos)++;
				746	acc = res;
				747	break;
				748	}
				749
				750	case '-': {
				751	if (oneterm && acc != NULL)
				752	return acc;
				753
				754	if (acc == NULL) {
				755	// unary minus
				756	(*pos)++;
				757	matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
				758	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				759
				760	matd_t *res = matd_scale(rhs, -1);
				761	garb[*garbpos] = res;
				762	(*garbpos)++;
				763	acc = res;
				764	} else {
				765	// subtract
				766	(*pos)++;
				767	matd_t *rhs = matd_op_recurse(expr, pos, NULL, args, argpos, garb, garbpos, 1);
				768	rhs = matd_op_gobble_right(expr, pos, rhs, garb, garbpos);
				769
				770	matd_t *res = matd_subtract(acc, rhs);
				771	garb[*garbpos] = res;
				772	(*garbpos)++;
				773	acc = res;
				774	}
				775	break;
				776	}
				777
				778	case ' ': {
				779	// nothing to do. spaces are meaningless.
				780	(*pos)++;
				781	break;
				782	}
				783
				784	default: {
				785	debug_print("Unknown character: '%c'\n", expr[*pos]);
				786	assert(expr[pos] != expr[pos]);
				787	}
				788	}
				789	}
				790	return acc;
				791	}
				792
				793	// always returns a new matrix.
				794	matd_t matd_op(const char expr, ...)
				795	{
				796	int nargs = 0;
				797	int exprlen = 0;
				798
				799	assert(expr != NULL);
				800
				801	for (const char p = expr; p != 0; p++) {
				802	if (p == 'M' \|\| p == 'F')
				803	nargs++;
				804	exprlen++;
				805	}
				806
				807	assert(nargs > 0);
				808
				809	if (!exprlen) // expr = ""
				810	return NULL;
				811
				812	va_list ap;
				813	va_start(ap, expr);
				814
				815	matd_t *args = malloc(sizeof(matd_t)*nargs);
				816	for (int i = 0; i < nargs; i++) {
				817	args[i] = va_arg(ap, matd_t*);
				818	// XXX: sanity check argument; emit warning/error if args[i]
				819	// doesn't look like a matd_t*.
				820	}
				821
				822	va_end(ap);
				823
				824	int pos = 0;
				825	int argpos = 0;
				826	int garbpos = 0;
				827
				828	// can't create more than 2 new result per character
				829	// one result, and possibly one argument to free
				830	matd_t *garb = malloc(sizeof(matd_t)2exprlen);
				831
				832	matd_t *res = matd_op_recurse(expr, &pos, NULL, args, &argpos, garb, &garbpos, 0);
				833	free(args);
				834
				835	// 'res' may need to be freed as part of garbage collection (i.e. expr = "F")
				836	matd_t *res_copy = (res ? matd_copy(res) : NULL);
				837
				838	for (int i = 0; i < garbpos; i++) {
				839	matd_destroy(garb[i]);
				840	}
				841	free(garb);
				842
				843	return res_copy;
				844	}
				845
				846	double matd_vec_mag(const matd_t *a)
				847	{
				848	assert(a != NULL);
				849	assert(matd_is_vector(a));
				850
				851	double mag = 0.0;
				852	int len = a->nrows*a->ncols;
				853	for (int i = 0; i < len; i++)
				854	mag += sq(a->data[i]);
				855	return sqrt(mag);
				856	}
				857
				858	double matd_vec_dist(const matd_t a, const matd_t b)
				859	{
				860	assert(a != NULL);
				861	assert(b != NULL);
				862	assert(matd_is_vector(a) && matd_is_vector(b));
				863	assert(a->nrowsa->ncols == b->nrowsb->ncols);
				864
				865	int lena = a->nrows*a->ncols;
				866	return matd_vec_dist_n(a, b, lena);
				867	}
				868
				869	double matd_vec_dist_n(const matd_t a, const matd_t b, int n)
				870	{
				871	assert(a != NULL);
				872	assert(b != NULL);
				873	assert(matd_is_vector(a) && matd_is_vector(b));
				874
				875	int lena = a->nrows*a->ncols;
				876	int lenb = b->nrows*b->ncols;
				877
				878	assert(n <= lena && n <= lenb);
				879
				880	double mag = 0.0;
				881	for (int i = 0; i < n; i++)
				882	mag += sq(a->data[i] - b->data[i]);
				883	return sqrt(mag);
				884	}
				885
				886	// find the index of the off-diagonal element with the largest mag
				887	static inline int max_idx(const matd_t *A, int row, int maxcol)
				888	{
				889	int maxi = 0;
				890	double maxv = -1;
				891
				892	for (int i = 0; i < maxcol; i++) {
				893	if (i == row)
				894	continue;
				895	double v = fabs(MATD_EL(A, row, i));
				896	if (v > maxv) {
				897	maxi = i;
				898	maxv = v;
				899	}
				900	}
				901
				902	return maxi;
				903	}
				904
				905	double matd_vec_dot_product(const matd_t a, const matd_t b)
				906	{
				907	assert(a != NULL);
				908	assert(b != NULL);
				909	assert(matd_is_vector(a) && matd_is_vector(b));
				910	int adim = a->ncols*a->nrows;
				911	int bdim = b->ncols*b->nrows;
				912	assert(adim == bdim);
				913
				914	double acc = 0;
				915	for (int i = 0; i < adim; i++) {
				916	acc += a->data[i] * b->data[i];
				917	}
				918	return acc;
				919	}
				920
				921
				922	matd_t matd_vec_normalize(const matd_t a)
				923	{
				924	assert(a != NULL);
				925	assert(matd_is_vector(a));
				926
				927	double mag = matd_vec_mag(a);
				928	assert(mag > 0);
				929
				930	matd_t *b = matd_create(a->nrows, a->ncols);
				931
				932	int len = a->nrows*a->ncols;
				933	for(int i = 0; i < len; i++)
				934	b->data[i] = a->data[i] / mag;
				935
				936	return b;
				937	}
				938
				939	matd_t matd_crossproduct(const matd_t a, const matd_t *b)
				940	{ // only defined for vecs (col or row) of length 3
				941	assert(a != NULL);
				942	assert(b != NULL);
				943	assert(matd_is_vector_len(a, 3) && matd_is_vector_len(b, 3));
				944
				945	matd_t * r = matd_create(a->nrows, a->ncols);
				946
				947	r->data[0] = a->data[1] * b->data[2] - a->data[2] * b->data[1];
				948	r->data[1] = a->data[2] * b->data[0] - a->data[0] * b->data[2];
				949	r->data[2] = a->data[0] * b->data[1] - a->data[1] * b->data[0];
				950
				951	return r;
				952	}
				953
				954	TYPE matd_err_inf(const matd_t a, const matd_t b)
				955	{
				956	assert(a->nrows == b->nrows);
				957	assert(a->ncols == b->ncols);
				958
				959	TYPE maxf = 0;
				960
				961	for (int i = 0; i < a->nrows; i++) {
				962	for (int j = 0; j < a->ncols; j++) {
				963	TYPE av = MATD_EL(a, i, j);
				964	TYPE bv = MATD_EL(b, i, j);
				965
				966	TYPE err = fabs(av - bv);
				967	maxf = fmax(maxf, err);
				968	}
				969	}
				970
				971	return maxf;
				972	}
				973
				974	// Computes an SVD for square or tall matrices. This code doesn't work
				975	// for wide matrices, because the bidiagonalization results in one
				976	// non-zero element too far to the right for us to rotate away.
				977	//
				978	// Caller is responsible for destroying U, S, and V.
				979	static matd_svd_t matd_svd_tall(matd_t *A, int flags)
				980	{
				981	matd_t *B = matd_copy(A);
				982
				983	// Apply householder reflections on each side to reduce A to
				984	// bidiagonal form. Specifically:
				985	//
				986	// A = LSBRS'
				987	//
				988	// Where B is bidiagonal, and LS/RS are unitary.
				989	//
				990	// Why are we doing this? Some sort of transformation is necessary
				991	// to reduce the matrix's nz elements to a square region. QR could
				992	// work too. We need nzs confined to a square region so that the
				993	// subsequent iterative process, which is based on rotations, can
				994	// work. (To zero out a term at (i,j), our rotations will also
				995	// affect (j,i).
				996	//
				997	// We prefer bidiagonalization over QR because it gets us "closer"
				998	// to the SVD, which should mean fewer iterations.
				999
				1000	// LS: cumulative left-handed transformations
				1001	matd_t *LS = matd_identity(A->nrows);
				1002
				1003	// RS: cumulative right-handed transformations.
				1004	matd_t *RS = matd_identity(A->ncols);
				1005
				1006	for (int hhidx = 0; hhidx < A->nrows; hhidx++) {
				1007
				1008	if (hhidx < A->ncols) {
				1009	// We construct the normal of the reflection plane: let u
				1010	// be the vector to reflect, x =[ M 0 0 0 ] the target
				1011	// location for u (u') after reflection (with M = \|\|u\|\|).
				1012	//
				1013	// The normal vector is then n = (u - x), but since we
				1014	// could equally have the target location be x = [-M 0 0 0
				1015	// ], we could use n = (u + x).
				1016	//
				1017	// We then normalize n. To ensure a reasonable magnitude,
				1018	// we select the sign of M so as to maximize the magnitude
				1019	// of the first element of (x +/- M). (Otherwise, we could
				1020	// end up with a divide-by-zero if u[0] and M cancel.)
				1021	//
				1022	// The householder reflection matrix is then H=(I - nn'), and
				1023	// u' = Hu.
				1024	//
				1025	//
				1026	int vlen = A->nrows - hhidx;
				1027
				1028	double v = malloc(sizeof(double)vlen);
				1029
				1030	double mag2 = 0;
				1031	for (int i = 0; i < vlen; i++) {
				1032	v[i] = MATD_EL(B, hhidx+i, hhidx);
				1033	mag2 += v[i]*v[i];
				1034	}
				1035
				1036	double oldv0 = v[0];
				1037	if (oldv0 < 0)
				1038	v[0] -= sqrt(mag2);
				1039	else
				1040	v[0] += sqrt(mag2);
				1041
				1042	mag2 += -oldv0oldv0 + v[0]v[0];
				1043
				1044	// normalize v
				1045	double mag = sqrt(mag2);
				1046
				1047	// this case arises with matrices of all zeros, for example.
				1048	if (mag == 0) {
				1049	free(v);
				1050	continue;
				1051	}
				1052
				1053	for (int i = 0; i < vlen; i++)
				1054	v[i] /= mag;
				1055
				1056	// Q = I - 2vv'
				1057	//matd_t *Q = matd_identity(A->nrows);
				1058	//for (int i = 0; i < vlen; i++)
				1059	// for (int j = 0; j < vlen; j++)
				1060	// MATD_EL(Q, i+hhidx, j+hhidx) -= 2v[i]v[j];
				1061
				1062
				1063	// LS = matd_op("F*M", LS, Q);
				1064	// Implementation: take each row of LS, compute dot product with n,
				1065	// subtract n (scaled by dot product) from it.
				1066	for (int i = 0; i < LS->nrows; i++) {
				1067	double dot = 0;
				1068	for (int j = 0; j < vlen; j++)
				1069	dot += MATD_EL(LS, i, hhidx+j) * v[j];
				1070	for (int j = 0; j < vlen; j++)
				1071	MATD_EL(LS, i, hhidx+j) -= 2dotv[j];
				1072	}
				1073
				1074	// B = matd_op("M*F", Q, B); // should be Q', but Q is symmetric.
				1075	for (int i = 0; i < B->ncols; i++) {
				1076	double dot = 0;
				1077	for (int j = 0; j < vlen; j++)
				1078	dot += MATD_EL(B, hhidx+j, i) * v[j];
				1079	for (int j = 0; j < vlen; j++)
				1080	MATD_EL(B, hhidx+j, i) -= 2dotv[j];
				1081	}
				1082
				1083	free(v);
				1084	}
				1085
				1086	if (hhidx+2 < A->ncols) {
				1087	int vlen = A->ncols - hhidx - 1;
				1088
				1089	double v = malloc(sizeof(double)vlen);
				1090
				1091	double mag2 = 0;
				1092	for (int i = 0; i < vlen; i++) {
				1093	v[i] = MATD_EL(B, hhidx, hhidx+i+1);
				1094	mag2 += v[i]*v[i];
				1095	}
				1096
				1097	double oldv0 = v[0];
				1098	if (oldv0 < 0)
				1099	v[0] -= sqrt(mag2);
				1100	else
				1101	v[0] += sqrt(mag2);
				1102
				1103	mag2 += -oldv0oldv0 + v[0]v[0];
				1104
				1105	// compute magnitude of ([1 0 0..]+v)
				1106	double mag = sqrt(mag2);
				1107
				1108	// this case can occur when the vectors are already perpendicular
				1109	if (mag == 0) {
				1110	free(v);
				1111	continue;
				1112	}
				1113
				1114	for (int i = 0; i < vlen; i++)
				1115	v[i] /= mag;
				1116
				1117	// TODO: optimize these multiplications
				1118	// matd_t *Q = matd_identity(A->ncols);
				1119	// for (int i = 0; i < vlen; i++)
				1120	// for (int j = 0; j < vlen; j++)
				1121	// MATD_EL(Q, i+1+hhidx, j+1+hhidx) -= 2v[i]v[j];
				1122
				1123	// RS = matd_op("F*M", RS, Q);
				1124	for (int i = 0; i < RS->nrows; i++) {
				1125	double dot = 0;
				1126	for (int j = 0; j < vlen; j++)
				1127	dot += MATD_EL(RS, i, hhidx+1+j) * v[j];
				1128	for (int j = 0; j < vlen; j++)
				1129	MATD_EL(RS, i, hhidx+1+j) -= 2dotv[j];
				1130	}
				1131
				1132	// B = matd_op("F*M", B, Q); // should be Q', but Q is symmetric.
				1133	for (int i = 0; i < B->nrows; i++) {
				1134	double dot = 0;
				1135	for (int j = 0; j < vlen; j++)
				1136	dot += MATD_EL(B, i, hhidx+1+j) * v[j];
				1137	for (int j = 0; j < vlen; j++)
				1138	MATD_EL(B, i, hhidx+1+j) -= 2dotv[j];
				1139	}
				1140
				1141	free(v);
				1142	}
				1143	}
				1144
				1145	// empirically, we find a roughly linear worst-case number of iterations
				1146	// as a function of rowscols. maxiters ~= 1.5nrows*ncols
				1147	// we're a bit conservative below.
				1148	int maxiters = 200 + 2 * A->nrows * A->ncols;
				1149	assert(maxiters > 0); // reassure clang
				1150	int iter;
				1151
				1152	double maxv; // maximum non-zero value being reduced this iteration
				1153
				1154	double tol = 1E-10;
				1155
				1156	// which method will we use to find the largest off-diagonal
				1157	// element of B?
				1158	const int find_max_method = 1; //(B->ncols < 6) ? 2 : 1;
				1159
				1160	// for each of the first B->ncols rows, which index has the
				1161	// maximum absolute value? (used by method 1)
				1162	int maxrowidx = malloc(sizeof(int)B->ncols);
				1163	int lastmaxi, lastmaxj;
				1164
				1165	if (find_max_method == 1) {
				1166	for (int i = 2; i < B->ncols; i++)
				1167	maxrowidx[i] = max_idx(B, i, B->ncols);
				1168
				1169	// note that we started the array at 2. That's because by setting
				1170	// these values below, we'll recompute first two entries on the
				1171	// first iteration!
				1172	lastmaxi = 0, lastmaxj = 1;
				1173	}
				1174
				1175	for (iter = 0; iter < maxiters; iter++) {
				1176
				1177	// No diagonalization required for 0x0 and 1x1 matrices.
				1178	if (B->ncols < 2)
				1179	break;
				1180
				1181	// find the largest off-diagonal element of B, and put its
				1182	// coordinates in maxi, maxj.
				1183	int maxi, maxj;
				1184
				1185	if (find_max_method == 1) {
				1186	// method 1 is the "smarter" method which does at least
				1187	// 4*ncols work. More work might be needed (up to
				1188	// ncols*ncols), depending on data. Thus, this might be a
				1189	// bit slower than the default method for very small
				1190	// matrices.
				1191	maxi = -1;
				1192	maxv = -1;
				1193
				1194	// every iteration, we must deal with the fact that rows
				1195	// and columns lastmaxi and lastmaxj have been
				1196	// modified. Update maxrowidx accordingly.
				1197
				1198	// now, EVERY row also had columns lastmaxi and lastmaxj modified.
				1199	for (int rowi = 0; rowi < B->ncols; rowi++) {
				1200
				1201	// the magnitude of the largest off-diagonal element
				1202	// in this row.
				1203	double thismaxv;
				1204
				1205	// row 'lastmaxi' and 'lastmaxj' have been completely
				1206	// changed. compute from scratch.
				1207	if (rowi == lastmaxi \|\| rowi == lastmaxj) {
				1208	maxrowidx[rowi] = max_idx(B, rowi, B->ncols);
				1209	thismaxv = fabs(MATD_EL(B, rowi, maxrowidx[rowi]));
				1210	goto endrowi;
				1211	}
				1212
				1213	// our maximum entry was just modified. We don't know
				1214	// if it went up or down, and so we don't know if it
				1215	// is still the maximum. We have to update from
				1216	// scratch.
				1217	if (maxrowidx[rowi] == lastmaxi \|\| maxrowidx[rowi] == lastmaxj) {
				1218	maxrowidx[rowi] = max_idx(B, rowi, B->ncols);
				1219	thismaxv = fabs(MATD_EL(B, rowi, maxrowidx[rowi]));
				1220	goto endrowi;
				1221	}
				1222
				1223	// This row is unchanged, except for columns
				1224	// 'lastmaxi' and 'lastmaxj', and those columns were
				1225	// not previously the largest entry... just check to
				1226	// see if they are now the maximum entry in their
				1227	// row. (Remembering to consider off-diagonal entries
				1228	// only!)
				1229	thismaxv = fabs(MATD_EL(B, rowi, maxrowidx[rowi]));
				1230
				1231	// check column lastmaxi. Is it now the maximum?
				1232	if (lastmaxi != rowi) {
				1233	double v = fabs(MATD_EL(B, rowi, lastmaxi));
				1234	if (v > thismaxv) {
				1235	thismaxv = v;
				1236	maxrowidx[rowi] = lastmaxi;
				1237	}
				1238	}
				1239
				1240	// check column lastmaxj
				1241	if (lastmaxj != rowi) {
				1242	double v = fabs(MATD_EL(B, rowi, lastmaxj));
				1243	if (v > thismaxv) {
				1244	thismaxv = v;
				1245	maxrowidx[rowi] = lastmaxj;
				1246	}
				1247	}
				1248
				1249	// does this row have the largest value we've seen so far?
				1250	endrowi:
				1251	if (thismaxv > maxv) {
				1252	maxv = thismaxv;
				1253	maxi = rowi;
				1254	}
				1255	}
				1256
				1257	assert(maxi >= 0);
				1258	maxj = maxrowidx[maxi];
				1259
				1260	// save these for the next iteration.
				1261	lastmaxi = maxi;
				1262	lastmaxj = maxj;
				1263
				1264	if (maxv < tol)
				1265	break;
				1266
				1267	} else if (find_max_method == 2) {
				1268	// brute-force (reference) version.
				1269	maxv = -1;
				1270
				1271	// only search top "square" portion
				1272	for (int i = 0; i < B->ncols; i++) {
				1273	for (int j = 0; j < B->ncols; j++) {
				1274	if (i == j)
				1275	continue;
				1276
				1277	double v = fabs(MATD_EL(B, i, j));
				1278
				1279	if (v > maxv) {
				1280	maxi = i;
				1281	maxj = j;
				1282	maxv = v;
				1283	}
				1284	}
				1285	}
				1286
				1287	// termination condition.
				1288	if (maxv < tol)
				1289	break;
				1290	} else {
				1291	assert(0);
				1292	}
				1293
				1294	// printf(">>> %5d %3d, %3d %15g\n", maxi, maxj, iter, maxv);
				1295
				1296	// Now, solve the 2x2 SVD problem for the matrix
				1297	// [ A0 A1 ]
				1298	// [ A2 A3 ]
				1299	double A0 = MATD_EL(B, maxi, maxi);
				1300	double A1 = MATD_EL(B, maxi, maxj);
				1301	double A2 = MATD_EL(B, maxj, maxi);
				1302	double A3 = MATD_EL(B, maxj, maxj);
				1303
				1304	if (1) {
				1305	double AQ[4];
				1306	AQ[0] = A0;
				1307	AQ[1] = A1;
				1308	AQ[2] = A2;
				1309	AQ[3] = A3;
				1310
				1311	double U[4], S[2], V[4];
				1312	svd22(AQ, U, S, V);
				1313
				1314	/* Reference (slow) implementation...
				1315
				1316	// LS = LS * ROT(theta) = LS * QL
				1317	matd_t *QL = matd_identity(A->nrows);
				1318	MATD_EL(QL, maxi, maxi) = U[0];
				1319	MATD_EL(QL, maxi, maxj) = U[1];
				1320	MATD_EL(QL, maxj, maxi) = U[2];
				1321	MATD_EL(QL, maxj, maxj) = U[3];
				1322
				1323	matd_t *QR = matd_identity(A->ncols);
				1324	MATD_EL(QR, maxi, maxi) = V[0];
				1325	MATD_EL(QR, maxi, maxj) = V[1];
				1326	MATD_EL(QR, maxj, maxi) = V[2];
				1327	MATD_EL(QR, maxj, maxj) = V[3];
				1328
				1329	LS = matd_op("F*M", LS, QL);
				1330	RS = matd_op("F*M", RS, QR); // remember we'll transpose RS.
				1331	B = matd_op("M'FM", QL, B, QR);
				1332
				1333	matd_destroy(QL);
				1334	matd_destroy(QR);
				1335	*/
				1336
				1337	// LS = matd_op("F*M", LS, QL);
				1338	for (int i = 0; i < LS->nrows; i++) {
				1339	double vi = MATD_EL(LS, i, maxi);
				1340	double vj = MATD_EL(LS, i, maxj);
				1341
				1342	MATD_EL(LS, i, maxi) = U[0]vi + U[2]vj;
				1343	MATD_EL(LS, i, maxj) = U[1]vi + U[3]vj;
				1344	}
				1345
				1346	// RS = matd_op("F*M", RS, QR); // remember we'll transpose RS.
				1347	for (int i = 0; i < RS->nrows; i++) {
				1348	double vi = MATD_EL(RS, i, maxi);
				1349	double vj = MATD_EL(RS, i, maxj);
				1350
				1351	MATD_EL(RS, i, maxi) = V[0]vi + V[2]vj;
				1352	MATD_EL(RS, i, maxj) = V[1]vi + V[3]vj;
				1353	}
				1354
				1355	// B = matd_op("M'FM", QL, B, QR);
				1356	// The QL matrix mixes rows of B.
				1357	for (int i = 0; i < B->ncols; i++) {
				1358	double vi = MATD_EL(B, maxi, i);
				1359	double vj = MATD_EL(B, maxj, i);
				1360
				1361	MATD_EL(B, maxi, i) = U[0]vi + U[2]vj;
				1362	MATD_EL(B, maxj, i) = U[1]vi + U[3]vj;
				1363	}
				1364
				1365	// The QR matrix mixes columns of B.
				1366	for (int i = 0; i < B->nrows; i++) {
				1367	double vi = MATD_EL(B, i, maxi);
				1368	double vj = MATD_EL(B, i, maxj);
				1369
				1370	MATD_EL(B, i, maxi) = V[0]vi + V[2]vj;
				1371	MATD_EL(B, i, maxj) = V[1]vi + V[3]vj;
				1372	}
				1373	}
				1374	}
				1375
				1376	free(maxrowidx);
				1377
				1378	if (!(flags & MATD_SVD_NO_WARNINGS) && iter == maxiters) {
				1379	debug_print("WARNING: maximum iters (maximum = %d, matrix %d x %d, max=%.15f)\n",
				1380	iter, A->nrows, A->ncols, maxv);
				1381
				1382	// matd_print(A, "%15f");
				1383	}
				1384
				1385	// them all positive by flipping the corresponding columns of
				1386	// U/LS.
				1387	int idxs = malloc(sizeof(int)A->ncols);
				1388	double vals = malloc(sizeof(double)A->ncols);
				1389	for (int i = 0; i < A->ncols; i++) {
				1390	idxs[i] = i;
				1391	vals[i] = MATD_EL(B, i, i);
				1392	}
				1393
				1394	// A bubble sort. Seriously.
				1395	int changed;
				1396	do {
				1397	changed = 0;
				1398
				1399	for (int i = 0; i + 1 < A->ncols; i++) {
				1400	if (fabs(vals[i+1]) > fabs(vals[i])) {
				1401	int tmpi = idxs[i];
				1402	idxs[i] = idxs[i+1];
				1403	idxs[i+1] = tmpi;
				1404
				1405	double tmpv = vals[i];
				1406	vals[i] = vals[i+1];
				1407	vals[i+1] = tmpv;
				1408
				1409	changed = 1;
				1410	}
				1411	}
				1412	} while (changed);
				1413
				1414	matd_t *LP = matd_identity(A->nrows);
				1415	matd_t *RP = matd_identity(A->ncols);
				1416
				1417	for (int i = 0; i < A->ncols; i++) {
				1418	MATD_EL(LP, idxs[i], idxs[i]) = 0; // undo the identity above
				1419	MATD_EL(RP, idxs[i], idxs[i]) = 0;
				1420
				1421	MATD_EL(LP, idxs[i], i) = vals[i] < 0 ? -1 : 1;
				1422	MATD_EL(RP, idxs[i], i) = 1; //vals[i] < 0 ? -1 : 1;
				1423	}
				1424	free(idxs);
				1425	free(vals);
				1426
				1427	// we've factored:
				1428	// LP(something)RP'
				1429
				1430	// solve for (something)
				1431	B = matd_op("M'FM", LP, B, RP);
				1432
				1433	// update LS and RS, remembering that RS will be transposed.
				1434	LS = matd_op("F*M", LS, LP);
				1435	RS = matd_op("F*M", RS, RP);
				1436
				1437	matd_destroy(LP);
				1438	matd_destroy(RP);
				1439
				1440	matd_svd_t res;
				1441	memset(&res, 0, sizeof(res));
				1442
				1443	// make B exactly diagonal
				1444
				1445	for (int i = 0; i < B->nrows; i++) {
				1446	for (int j = 0; j < B->ncols; j++) {
				1447	if (i != j)
				1448	MATD_EL(B, i, j) = 0;
				1449	}
				1450	}
				1451
				1452	res.U = LS;
				1453	res.S = B;
				1454	res.V = RS;
				1455
				1456	return res;
				1457	}
				1458
				1459	matd_svd_t matd_svd(matd_t *A)
				1460	{
				1461	return matd_svd_flags(A, 0);
				1462	}
				1463
				1464	matd_svd_t matd_svd_flags(matd_t *A, int flags)
				1465	{
				1466	matd_svd_t res;
				1467
				1468	if (A->ncols <= A->nrows) {
				1469	res = matd_svd_tall(A, flags);
				1470	} else {
				1471	matd_t *At = matd_transpose(A);
				1472
				1473	// A =U S V'
				1474	// A'=V S' U'
				1475
				1476	matd_svd_t tmp = matd_svd_tall(At, flags);
				1477
				1478	memset(&res, 0, sizeof(res));
				1479	res.U = tmp.V; //matd_transpose(tmp.V);
				1480	res.S = matd_transpose(tmp.S);
				1481	res.V = tmp.U; //matd_transpose(tmp.U);
				1482
				1483	matd_destroy(tmp.S);
				1484	matd_destroy(At);
				1485	}
				1486
				1487	/*
				1488	matd_t check = matd_op("MM*M'-M", res.U, res.S, res.V, A);
				1489	double maxerr = 0;
				1490
				1491	for (int i = 0; i < check->nrows; i++)
				1492	for (int j = 0; j < check->ncols; j++)
				1493	maxerr = fmax(maxerr, fabs(MATD_EL(check, i, j)));
				1494
				1495	matd_destroy(check);
				1496
				1497	if (maxerr > 1e-7) {
				1498	printf("bad maxerr: %15f\n", maxerr);
				1499	}
				1500
				1501	if (maxerr > 1e-5) {
				1502	printf("bad maxerr: %15f\n", maxerr);
				1503	matd_print(A, "%15f");
				1504	assert(0);
				1505	}
				1506
				1507	*/
				1508	return res;
				1509	}
				1510
				1511
				1512	matd_plu_t matd_plu(const matd_t a)
				1513	{
				1514	unsigned int *piv = calloc(a->nrows, sizeof(unsigned int));
				1515	int pivsign = 1;
				1516	matd_t *lu = matd_copy(a);
				1517
				1518	// only for square matrices.
				1519	assert(a->nrows == a->ncols);
				1520
				1521	matd_plu_t *mlu = calloc(1, sizeof(matd_plu_t));
				1522
				1523	for (int i = 0; i < a->nrows; i++)
				1524	piv[i] = i;
				1525
				1526	for (int j = 0; j < a->ncols; j++) {
				1527	for (int i = 0; i < a->nrows; i++) {
				1528	int kmax = i < j ? i : j; // min(i,j)
				1529
				1530	// compute dot product of row i with column j (up through element kmax)
				1531	double acc = 0;
				1532	for (int k = 0; k < kmax; k++)
				1533	acc += MATD_EL(lu, i, k) * MATD_EL(lu, k, j);
				1534
				1535	MATD_EL(lu, i, j) -= acc;
				1536	}
				1537
				1538	// find pivot and exchange if necessary.
				1539	int p = j;
				1540	if (1) {
				1541	for (int i = j+1; i < lu->nrows; i++) {
				1542	if (fabs(MATD_EL(lu,i,j)) > fabs(MATD_EL(lu, p, j))) {
				1543	p = i;
				1544	}
				1545	}
				1546	}
				1547
				1548	// swap rows p and j?
				1549	if (p != j) {
				1550	TYPE tmp = malloc(sizeof(TYPE)lu->ncols);
				1551	memcpy(tmp, &MATD_EL(lu, p, 0), sizeof(TYPE) * lu->ncols);
				1552	memcpy(&MATD_EL(lu, p, 0), &MATD_EL(lu, j, 0), sizeof(TYPE) * lu->ncols);
				1553	memcpy(&MATD_EL(lu, j, 0), tmp, sizeof(TYPE) * lu->ncols);
				1554	int k = piv[p];
				1555	piv[p] = piv[j];
				1556	piv[j] = k;
				1557	pivsign = -pivsign;
				1558	free(tmp);
				1559	}
				1560
				1561	double LUjj = MATD_EL(lu, j, j);
				1562
				1563	// If our pivot is very small (which means the matrix is
				1564	// singular or nearly singular), replace with a new pivot of the
				1565	// right sign.
				1566	if (fabs(LUjj) < MATD_EPS) {
				1567	/*
				1568	if (LUjj < 0)
				1569	LUjj = -MATD_EPS;
				1570	else
				1571	LUjj = MATD_EPS;
				1572
				1573	MATD_EL(lu, j, j) = LUjj;
				1574	*/
				1575	mlu->singular = 1;
				1576	}
				1577
				1578	if (j < lu->ncols && j < lu->nrows && LUjj != 0) {
				1579	LUjj = 1.0 / LUjj;
				1580	for (int i = j+1; i < lu->nrows; i++)
				1581	MATD_EL(lu, i, j) *= LUjj;
				1582	}
				1583	}
				1584
				1585	mlu->lu = lu;
				1586	mlu->piv = piv;
				1587	mlu->pivsign = pivsign;
				1588
				1589	return mlu;
				1590	}
				1591
				1592	void matd_plu_destroy(matd_plu_t *mlu)
				1593	{
				1594	matd_destroy(mlu->lu);
				1595	free(mlu->piv);
				1596	memset(mlu, 0, sizeof(matd_plu_t));
				1597	free(mlu);
				1598	}
				1599
				1600	double matd_plu_det(const matd_plu_t *mlu)
				1601	{
				1602	matd_t *lu = mlu->lu;
				1603	double det = mlu->pivsign;
				1604
				1605	if (lu->nrows == lu->ncols) {
				1606	for (int i = 0; i < lu->ncols; i++)
				1607	det *= MATD_EL(lu, i, i);
				1608	}
				1609
				1610	return det;
				1611	}
				1612
				1613	matd_t matd_plu_p(const matd_plu_t mlu)
				1614	{
				1615	matd_t *lu = mlu->lu;
				1616	matd_t *P = matd_create(lu->nrows, lu->nrows);
				1617
				1618	for (int i = 0; i < lu->nrows; i++) {
				1619	MATD_EL(P, mlu->piv[i], i) = 1;
				1620	}
				1621
				1622	return P;
				1623	}
				1624
				1625	matd_t matd_plu_l(const matd_plu_t mlu)
				1626	{
				1627	matd_t *lu = mlu->lu;
				1628
				1629	matd_t *L = matd_create(lu->nrows, lu->ncols);
				1630	for (int i = 0; i < lu->nrows; i++) {
				1631	MATD_EL(L, i, i) = 1;
				1632
				1633	for (int j = 0; j < i; j++) {
				1634	MATD_EL(L, i, j) = MATD_EL(lu, i, j);
				1635	}
				1636	}
				1637
				1638	return L;
				1639	}
				1640
				1641	matd_t matd_plu_u(const matd_plu_t mlu)
				1642	{
				1643	matd_t *lu = mlu->lu;
				1644
				1645	matd_t *U = matd_create(lu->ncols, lu->ncols);
				1646	for (int i = 0; i < lu->ncols; i++) {
				1647	for (int j = 0; j < lu->ncols; j++) {
				1648	if (i <= j)
				1649	MATD_EL(U, i, j) = MATD_EL(lu, i, j);
				1650	}
				1651	}
				1652
				1653	return U;
				1654	}
				1655
				1656	// PLU = A
				1657	// Ax = B
				1658	// PLUx = B
				1659	// LUx = P'B
				1660	matd_t matd_plu_solve(const matd_plu_t mlu, const matd_t *b)
				1661	{
				1662	matd_t *x = matd_copy(b);
				1663
				1664	// permute right hand side
				1665	for (int i = 0; i < mlu->lu->nrows; i++)
				1666	memcpy(&MATD_EL(x, i, 0), &MATD_EL(b, mlu->piv[i], 0), sizeof(TYPE) * b->ncols);
				1667
				1668	// solve Ly = b
				1669	for (int k = 0; k < mlu->lu->nrows; k++) {
				1670	for (int i = k+1; i < mlu->lu->nrows; i++) {
				1671	double LUik = -MATD_EL(mlu->lu, i, k);
				1672	for (int t = 0; t < b->ncols; t++)
				1673	MATD_EL(x, i, t) += MATD_EL(x, k, t) * LUik;
				1674	}
				1675	}
				1676
				1677	// solve Ux = y
				1678	for (int k = mlu->lu->ncols-1; k >= 0; k--) {
				1679	double LUkk = 1.0 / MATD_EL(mlu->lu, k, k);
				1680	for (int t = 0; t < b->ncols; t++)
				1681	MATD_EL(x, k, t) *= LUkk;
				1682
				1683	for (int i = 0; i < k; i++) {
				1684	double LUik = -MATD_EL(mlu->lu, i, k);
				1685	for (int t = 0; t < b->ncols; t++)
				1686	MATD_EL(x, i, t) += MATD_EL(x, k, t) *LUik;
				1687	}
				1688	}
				1689
				1690	return x;
				1691	}
				1692
				1693	matd_t matd_solve(matd_t A, matd_t *b)
				1694	{
				1695	matd_plu_t *mlu = matd_plu(A);
				1696	matd_t *x = matd_plu_solve(mlu, b);
				1697
				1698	matd_plu_destroy(mlu);
				1699	return x;
				1700	}
				1701
				1702	#if 0
				1703
				1704	static int randi()
				1705	{
				1706	int v = random()&31;
				1707	v -= 15;
				1708	return v;
				1709	}
				1710
				1711	static double randf()
				1712	{
				1713	double v = 1.0 *random() / RAND_MAX;
				1714	return 2*v - 1;
				1715	}
				1716
				1717	int main(int argc, char *argv[])
				1718	{
				1719	if (1) {
				1720	int maxdim = 16;
				1721	matd_t *A = matd_create(maxdim, maxdim);
				1722
				1723	for (int iter = 0; 1; iter++) {
				1724	srand(iter);
				1725
				1726	if (iter % 1000 == 0)
				1727	printf("%d\n", iter);
				1728
				1729	int m = 1 + (random()%(maxdim-1));
				1730	int n = 1 + (random()%(maxdim-1));
				1731
				1732	for (int i = 0; i < m*n; i++)
				1733	A->data[i] = randi();
				1734
				1735	A->nrows = m;
				1736	A->ncols = n;
				1737
				1738	// printf("%d %d ", m, n);
				1739	matd_svd_t svd = matd_svd(A);
				1740	matd_destroy(svd.U);
				1741	matd_destroy(svd.S);
				1742	matd_destroy(svd.V);
				1743
				1744	}
				1745
				1746	/* matd_t *A = matd_create_data(2, 5, (double[]) { 1, 5, 2, 6,
				1747	3, 3, 0, 7,
				1748	1, 1, 0, -2,
				1749	4, 0, 9, 9, 2, 6, 1, 3, 2, 5, 5, 4, -1, 2, 5, 9, 8, 2 });
				1750
				1751	matd_svd(A);
				1752	*/
				1753	return 0;
				1754	}
				1755
				1756
				1757	struct svd22 s;
				1758
				1759	srand(0);
				1760
				1761	matd_t *A = matd_create(2, 2);
				1762	MATD_EL(A,0,0) = 4;
				1763	MATD_EL(A,0,1) = 7;
				1764	MATD_EL(A,1,0) = 2;
				1765	MATD_EL(A,1,1) = 6;
				1766
				1767	matd_t *U = matd_create(2, 2);
				1768	matd_t *V = matd_create(2, 2);
				1769	matd_t *S = matd_create(2, 2);
				1770
				1771	for (int iter = 0; 1; iter++) {
				1772	if (iter % 100000 == 0)
				1773	printf("%d\n", iter);
				1774
				1775	MATD_EL(A,0,0) = randf();
				1776	MATD_EL(A,0,1) = randf();
				1777	MATD_EL(A,1,0) = randf();
				1778	MATD_EL(A,1,1) = randf();
				1779
				1780	matd_svd22_impl(A->data, &s);
				1781
				1782	memcpy(U->data, s.U, 4*sizeof(double));
				1783	memcpy(V->data, s.V, 4*sizeof(double));
				1784	MATD_EL(S,0,0) = s.S[0];
				1785	MATD_EL(S,1,1) = s.S[1];
				1786
				1787	assert(s.S[0] >= s.S[1]);
				1788	assert(s.S[0] >= 0);
				1789	assert(s.S[1] >= 0);
				1790	if (s.S[0] == 0) {
				1791	// printf("*"); fflush(NULL);
				1792	// printf("%15f %15f %15f %15f\n", MATD_EL(A,0,0), MATD_EL(A,0,1), MATD_EL(A,1,0), MATD_EL(A,1,1));
				1793	}
				1794	if (s.S[1] == 0) {
				1795	// printf("#"); fflush(NULL);
				1796	}
				1797
				1798	matd_t USV = matd_op("MM*M'", U, S, V);
				1799
				1800	double maxerr = 0;
				1801	for (int i = 0; i < 4; i++)
				1802	maxerr = fmax(maxerr, fabs(USV->data[i] - A->data[i]));
				1803
				1804	if (0) {
				1805	printf("------------------------------------\n");
				1806	printf("A:\n");
				1807	matd_print(A, "%15f");
				1808	printf("\nUSV':\n");
				1809	matd_print(USV, "%15f");
				1810	printf("maxerr: %.15f\n", maxerr);
				1811	printf("\n\n");
				1812	}
				1813
				1814	matd_destroy(USV);
				1815
				1816	assert(maxerr < 0.00001);
				1817	}
				1818	}
				1819
				1820	#endif
				1821
				1822	// XXX NGV Cholesky
				1823	/static double matd_cholesky_raw(double *A, int n)
				1824	{
				1825	double L = (double)calloc(n * n, sizeof(double));
				1826
				1827	for (int i = 0; i < n; i++) {
				1828	for (int j = 0; j < (i+1); j++) {
				1829	double s = 0;
				1830	for (int k = 0; k < j; k++)
				1831	s += L[i * n + k] * L[j * n + k];
				1832	L[i * n + j] = (i == j) ?
				1833	sqrt(A[i * n + i] - s) :
				1834	(1.0 / L[j * n + j] * (A[i * n + j] - s));
				1835	}
				1836	}
				1837
				1838	return L;
				1839	}
				1840
				1841	matd_t matd_cholesky(const matd_t A)
				1842	{
				1843	assert(A->nrows == A->ncols);
				1844	double *L_data = matd_cholesky_raw(A->data, A->nrows);
				1845	matd_t *L = matd_create_data(A->nrows, A->ncols, L_data);
				1846	free(L_data);
				1847	return L;
				1848	}*/
				1849
				1850	// NOTE: The below implementation of Cholesky is different from the one
				1851	// used in NGV.
				1852	matd_chol_t matd_chol(matd_t A)
				1853	{
				1854	assert(A->nrows == A->ncols);
				1855	int N = A->nrows;
				1856
				1857	// make upper right
				1858	matd_t *U = matd_copy(A);
				1859
				1860	// don't actually need to clear lower-left... we won't touch it.
				1861	/* for (int i = 0; i < U->nrows; i++) {
				1862	for (int j = 0; j < i; j++) {
				1863	// assert(MATD_EL(U, i, j) == MATD_EL(U, j, i));
				1864	MATD_EL(U, i, j) = 0;
				1865	}
				1866	}
				1867	*/
				1868	int is_spd = 1; // (A->nrows == A->ncols);
				1869
				1870	for (int i = 0; i < N; i++) {
				1871	double d = MATD_EL(U, i, i);
				1872	is_spd &= (d > 0);
				1873
				1874	if (d < MATD_EPS)
				1875	d = MATD_EPS;
				1876	d = 1.0 / sqrt(d);
				1877
				1878	for (int j = i; j < N; j++)
				1879	MATD_EL(U, i, j) *= d;
				1880
				1881	for (int j = i+1; j < N; j++) {
				1882	double s = MATD_EL(U, i, j);
				1883
				1884	if (s == 0)
				1885	continue;
				1886
				1887	for (int k = j; k < N; k++) {
				1888	MATD_EL(U, j, k) -= MATD_EL(U, i, k)*s;
				1889	}
				1890	}
				1891	}
				1892
				1893	matd_chol_t *chol = calloc(1, sizeof(matd_chol_t));
				1894	chol->is_spd = is_spd;
				1895	chol->u = U;
				1896	return chol;
				1897	}
				1898
				1899	void matd_chol_destroy(matd_chol_t *chol)
				1900	{
				1901	matd_destroy(chol->u);
				1902	free(chol);
				1903	}
				1904
				1905	// Solve: (U')x = b, U is upper triangular
				1906	void matd_ltransposetriangle_solve(matd_t u, const TYPE b, TYPE *x)
				1907	{
				1908	int n = u->ncols;
				1909	memcpy(x, b, n*sizeof(TYPE));
				1910	for (int i = 0; i < n; i++) {
				1911	x[i] /= MATD_EL(u, i, i);
				1912
				1913	for (int j = i+1; j < u->ncols; j++) {
				1914	x[j] -= x[i] * MATD_EL(u, i, j);
				1915	}
				1916	}
				1917	}
				1918
				1919	// Solve: Lx = b, L is lower triangular
				1920	void matd_ltriangle_solve(matd_t L, const TYPE b, TYPE *x)
				1921	{
				1922	int n = L->ncols;
				1923
				1924	for (int i = 0; i < n; i++) {
				1925	double acc = b[i];
				1926
				1927	for (int j = 0; j < i; j++) {
				1928	acc -= MATD_EL(L, i, j)*x[j];
				1929	}
				1930
				1931	x[i] = acc / MATD_EL(L, i, i);
				1932	}
				1933	}
				1934
				1935	// solve Ux = b, U is upper triangular
				1936	void matd_utriangle_solve(matd_t u, const TYPE b, TYPE *x)
				1937	{
				1938	for (int i = u->ncols-1; i >= 0; i--) {
				1939	double bi = b[i];
				1940
				1941	double diag = MATD_EL(u, i, i);
				1942
				1943	for (int j = i+1; j < u->ncols; j++)
				1944	bi -= MATD_EL(u, i, j)*x[j];
				1945
				1946	x[i] = bi / diag;
				1947	}
				1948	}
				1949
				1950	matd_t matd_chol_solve(const matd_chol_t chol, const matd_t *b)
				1951	{
				1952	matd_t *u = chol->u;
				1953
				1954	matd_t *x = matd_copy(b);
				1955
				1956	// LUx = b
				1957
				1958	// solve Ly = b ==> (U')y = b
				1959
				1960	for (int i = 0; i < u->nrows; i++) {
				1961	for (int j = 0; j < i; j++) {
				1962	// b[i] -= L[i,j]*x[j]... replicated across columns of b
				1963	// ==> i.e., ==>
				1964	// b[i,k] -= L[i,j]*x[j,k]
				1965	for (int k = 0; k < b->ncols; k++) {
				1966	MATD_EL(x, i, k) -= MATD_EL(u, j, i)*MATD_EL(x, j, k);
				1967	}
				1968	}
				1969	// x[i] = b[i] / L[i,i]
				1970	for (int k = 0; k < b->ncols; k++) {
				1971	MATD_EL(x, i, k) /= MATD_EL(u, i, i);
				1972	}
				1973	}
				1974
				1975	// solve Ux = y
				1976	for (int k = u->ncols-1; k >= 0; k--) {
				1977	double LUkk = 1.0 / MATD_EL(u, k, k);
				1978	for (int t = 0; t < b->ncols; t++)
				1979	MATD_EL(x, k, t) *= LUkk;
				1980
				1981	for (int i = 0; i < k; i++) {
				1982	double LUik = -MATD_EL(u, i, k);
				1983	for (int t = 0; t < b->ncols; t++)
				1984	MATD_EL(x, i, t) += MATD_EL(x, k, t) *LUik;
				1985	}
				1986	}
				1987
				1988	return x;
				1989	}
				1990
				1991	/void matd_chol_solve(matd_chol_t chol, const TYPE b, TYPE x)
				1992	{
				1993	matd_t *u = chol->u;
				1994
				1995	TYPE y[u->ncols];
				1996	matd_ltransposetriangle_solve(u, b, y);
				1997	matd_utriangle_solve(u, y, x);
				1998	}
				1999	*/
				2000	// only sensible on PSD matrices. had expected it to be faster than
				2001	// inverse via LU... for now, doesn't seem to be.
				2002	matd_t matd_chol_inverse(matd_t a)
				2003	{
				2004	assert(a->nrows == a->ncols);
				2005
				2006	matd_chol_t *chol = matd_chol(a);
				2007
				2008	matd_t *eye = matd_identity(a->nrows);
				2009	matd_t *inv = matd_chol_solve(chol, eye);
				2010	matd_destroy(eye);
				2011	matd_chol_destroy(chol);
				2012
				2013	return inv;
				2014	}
				2015
				2016	double matd_max(matd_t *m)
				2017	{
				2018	double d = -DBL_MAX;
				2019	for(int x=0; x<m->nrows; x++) {
				2020	for(int y=0; y<m->ncols; y++) {
				2021	if(MATD_EL(m, x, y) > d)
				2022	d = MATD_EL(m, x, y);
				2023	}
				2024	}
				2025
				2026	return d;
				2027	}