Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

sba_levmar.c @ 37

Last change on this file since 37 was 37, checked in by (none), 14 years ago
Added original make3d
File size: 74.8 KB

Line
1	/////////////////////////////////////////////////////////////////////////////////
2	////
3	//// Expert drivers for sparse bundle adjustment based on the
4	//// Levenberg - Marquardt minimization algorithm
5	//// Copyright (C) 2004 Manolis Lourakis (lourakis@ics.forth.gr)
6	//// Institute of Computer Science, Foundation for Research & Technology - Hellas
7	//// Heraklion, Crete, Greece.
8	////
9	//// This program is free software; you can redistribute it and/or modify
10	//// it under the terms of the GNU General Public License as published by
11	//// the Free Software Foundation; either version 2 of the License, or
12	//// (at your option) any later version.
13	////
14	//// This program is distributed in the hope that it will be useful,
15	//// but WITHOUT ANY WARRANTY; without even the implied warranty of
16	//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	//// GNU General Public License for more details.
18	////
19	///////////////////////////////////////////////////////////////////////////////////
20
21	#include <stdio.h>
22	#include <stdlib.h>
23	#include <string.h>
24	#include <math.h>
25	#include <float.h>
26
27	#include "sba.h"
28	#include "sba_chkjac.h"
29
30	#define SBA_EPSILON 1E-12
31	#define SBA_EPSILON_SQ ( (SBA_EPSILON)*(SBA_EPSILON) )
32
33	#define SBA_ONE_THIRD 0.3333333334 /* 1.0/3.0 */
34
35
36	#define emalloc(sz) emalloc_(__FILE__, __LINE__, sz)
37
38	#define FABS(x) (((x)>=0)? (x) : -(x))
39
40	#define ROW_MAJOR 0
41	#define COLUMN_MAJOR 1
42	#define MAT_STORAGE COLUMN_MAJOR
43
44
45	/* inline */
46	#ifdef _MSC_VER
47	#define inline __inline //MSVC
48	#elif !defined(__GNUC__)
49	#define inline //other than MSVC, GCC: define empty
50	#endif
51
52	/* contains information necessary for computing a finite difference approximation to a jacobian,
53	* e.g. function to differentiate, problem dimensions and pointers to working memory buffers
54	*/
55	struct fdj_data_x_ {
56	void (func)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double hx, void adata); / function to differentiate */
57	int cnp, pnp, mnp; /* parameter numbers */
58	int *func_rcidxs,
59	func_rcsubs; / working memory for func invocations.
60	* Notice that this has to be different
61	* than the working memory used for
62	* evaluating the jacobian!
63	*/
64	double hx, hxx; /* memory to save results in */
65	void *adata;
66	};
67
68	/* auxiliary memory allocation routine with error checking */
69	inline static void emalloc_(char file, int line, size_t sz)
70	{
71	void *ptr;
72
73	ptr=(void *)malloc(sz);
74	if(ptr==NULL){
75	fprintf(stderr, "memory allocation request for %u bytes failed in file %s, line %d, exiting", sz, file, line);
76	exit(1);
77	}
78
79	return ptr;
80	}
81
82	/* auxiliary routine for clearing an array of doubles */
83	inline static void _dblzero(register double *arr, register int count)
84	{
85	while(--count>=0)
86	*arr++=0.0;
87	}
88
89	/* auxiliary routine for computing the mean reprojection error; used for debugging */
90	static double sba_mean_repr_error(int n, int mnp, double x, double hx, struct sba_crsm idxij, int rcidxs, int *rcsubs)
91	{
92	register int i, j;
93	int nnz, nprojs;
94	double ptr1, ptr2;
95	double err;
96
97	for(i=nprojs=0, err=0.0; i<n; ++i){
98	nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
99	nprojs+=nnz;
100	for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */
101	ptr1=x + idxij->val[rcidxs[j]]*mnp;
102	ptr2=hx + idxij->val[rcidxs[j]]*mnp;
103
104	err+=sqrt((ptr1[0]-ptr2[0])(ptr1[0]-ptr2[0]) + (ptr1[1]-ptr2[1])(ptr1[1]-ptr2[1]));
105	}
106	}
107
108	return err/((double)(nprojs));
109	}
110
111	/* print the solution in p using sba's text format. If cnp/pnp==0 only points/cameras are printed */
112	static void sba_print_sol(int n, int m, double p, int cnp, int pnp, double x, int mnp, struct sba_crsm idxij, int rcidxs, int *rcsubs)
113	{
114	register int i, j, ii;
115	int nnz;
116	double *ptr;
117
118	if(cnp){
119	/* print camera parameters */
120	for(j=0; j<m; ++j){
121	ptr=p+cnp*j;
122	for(ii=0; ii<cnp; ++ii)
123	printf("%g ", ptr[ii]);
124	printf("\n");
125	}
126	}
127
128	if(pnp){
129	/* 3D & 2D point parameters */
130	printf("\n\n\n# X Y Z nframes frame0 x0 y0 frame1 x1 y1 ...\n");
131	for(i=0; i<n; ++i){
132	ptr=p+cnpm+ipnp;
133	for(ii=0; ii<pnp; ++ii) // print 3D coordinates
134	printf("%g ", ptr[ii]);
135
136	nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
137	printf("%d ", nnz);
138
139	for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */
140	ptr=x + idxij->val[rcidxs[j]]*mnp;
141
142	printf("%d ", rcsubs[j]);
143	for(ii=0; ii<mnp; ++ii) // print 2D coordinates
144	printf("%g ", ptr[ii]);
145	}
146	printf("\n");
147	}
148	}
149	}
150
151
152	/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in
153	* jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
154	* The jacobian is approximated with the aid of finite differences and is returned in the order
155	* (A_11, ..., A_1m, ..., A_n1, ..., A_nm, B_11, ..., B_1m, ..., B_n1, ..., B_nm),
156	* where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ).
157	* Notice that depending on idxij, some of the A_ij, B_ij might be missing
158	*
159	* Problem-specific information is assumed to be stored in a structure pointed to by "dat".
160	*
161	* NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure:
162	* A_11 0 0 B_11 0 0 0
163	* 0 A_12 0 B_12 0 0 0
164	* 0 0 A_13 B_13 0 0 0
165	* A_21 0 0 0 B_21 0 0
166	* 0 A_22 0 0 B_22 0 0
167	* 0 0 A_23 0 B_23 0 0
168	* A_31 0 0 0 0 B_31 0
169	* 0 A_32 0 0 0 B_32 0
170	* 0 0 A_33 0 0 B_33 0
171	* A_41 0 0 0 0 0 B_41
172	* 0 A_42 0 0 0 0 B_42
173	* 0 0 A_43 0 0 0 B_43
174	* To reduce the total number of objective function evaluations, this structure can be
175	* exploited as follows: A certain d is added to the j-th parameters of all cameras and
176	* the objective function is evaluated at the resulting point. This evaluation suffices
177	* to compute the corresponding columns of all A_ij through finite differences. A similar
178	* strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function
179	* evaluations are needed to compute the jacobian, much fewer compared to the mcnp+npnp+1
180	* that would be required by the naive strategy of computing one column of the jacobian
181	* per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is
182	* much faster compared to the naive strategy, it is not preferable to analytic jacobians,
183	* since the latter are considerably faster to compute and result in fewer LM iterations.
184	*/
185	static void sba_fdjac_x(
186	double p, / I: current parameter estimate, (mcnp+npnp)x1 */
187	struct sba_crsm idxij, / I: sparse matrix containing the location of x_ij in hx */
188	int rcidxs, / work array for the indexes of nonzero elements of a single sparse matrix row/column */
189	int rcsubs, / work array for the subscripts of nonzero elements in a single sparse matrix row/column */
190	double jac, / O: array for storing the approximated jacobian */
191	void dat) / I: points to a "fdj_data_x_" structure */
192	{
193	register int i, j, ii, jj;
194	double pa, pb, pqr, ppt, jaca, jacb;
195	register double pAB, phx, *phxx;
196	int n, m, nm, nnz, Asz, Bsz, idx;
197
198	double *tmpd;
199	register double d;
200
201	struct fdj_data_x_ *fdjd;
202	void (func)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double hx, void *adata);
203	double hx, hxx;
204	int cnp, pnp, mnp;
205	void *adata;
206
207
208	/* retrieve problem-specific information passed in dat /
209	fdjd=(struct fdj_data_x_ *)dat;
210	func=fdjd->func;
211	cnp=fdjd->cnp; pnp=fdjd->pnp; mnp=fdjd->mnp;
212	hx=fdjd->hx;
213	hxx=fdjd->hxx;
214	adata=fdjd->adata;
215
216	n=idxij->nr; m=idxij->nc;
217	pa=p; pb=p+m*cnp;
218	Asz=mnpcnp; Bsz=mnppnp;
219	jaca=jac; jacb=jac+idxij->nnz*Asz;
220
221	nm=(n>=m)? n : m; // max(n, m);
222	tmpd=(double )emalloc(nmsizeof(double));
223
224	(*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx, adata); // evaluate supplied function on current solution
225
226	if(cnp){ // is motion varying?
227	/* compute A_ij */
228	for(jj=0; jj<cnp; ++jj){
229	for(j=0; j<m; ++j){
230	pqr=pa+j*cnp; // j-th camera parameters
231	/* determine d=max(SBA_DELTA_SCALE\|pqr[jj]\|, SBA_MIN_DELTA), see HZ /
232	d=(double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation
233	d=FABS(d);
234	if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA;
235
236	tmpd[j]=d;
237	pqr[jj]+=d;
238	}
239
240	(*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
241
242	for(j=0; j<m; ++j){
243	pqr=pa+j*cnp; // j-th camera parameters
244	pqr[jj]-=tmpd[j]; /* restore */
245	d=1.0/tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */
246
247	nnz=sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
248	for(i=0; i<nnz; ++i){
249	idx=idxij->val[rcidxs[i]];
250	phx=hx + idx*mnp; // set phx to point to hx_ij
251	phxx=hxx + idx*mnp; // set phxx to point to hxx_ij
252	pAB=jaca + idx*Asz; // set pAB to point to A_ij
253
254	for(ii=0; ii<mnp; ++ii)
255	pAB[iicnp+jj]=(phxx[ii]-phx[ii])d;
256	}
257	}
258	}
259	}
260
261	if(pnp){ // is structure varying?
262	/* compute B_ij */
263	for(jj=0; jj<pnp; ++jj){
264	for(i=0; i<n; ++i){
265	ppt=pb+i*pnp; // i-th point parameters
266	/* determine d=max(SBA_DELTA_SCALE\|ppt[jj]\|, SBA_MIN_DELTA), see HZ /
267	d=(double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation
268	d=FABS(d);
269	if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA;
270
271	tmpd[i]=d;
272	ppt[jj]+=d;
273	}
274
275	(*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
276
277	for(i=0; i<n; ++i){
278	ppt=pb+i*pnp; // i-th point parameters
279	ppt[jj]-=tmpd[i]; /* restore */
280	d=1.0/tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */
281
282	nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
283	for(j=0; j<nnz; ++j){
284	idx=idxij->val[rcidxs[j]];
285	phx=hx + idx*mnp; // set phx to point to hx_ij
286	phxx=hxx + idx*mnp; // set phxx to point to hxx_ij
287	pAB=jacb + idx*Bsz; // set pAB to point to B_ij
288
289	for(ii=0; ii<mnp; ++ii)
290	pAB[iipnp+jj]=(phxx[ii]-phx[ii])d;
291	}
292	}
293	}
294	}
295
296	free(tmpd);
297	}
298
299	/* Bundle adjustment on camera and structure parameters
300	* using the sparse Levenberg-Marquardt as described in HZ p. 568
301	*/
302
303	int sba_motstr_levmar_x(
304	const int n, /* number of points */
305	const int m, /* number of images */
306	const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified.
307	* All A_ij (see below) with j<mcon are assumed to be zero
308	*/
309	char vmask, / visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */
310	double p, / initial parameter vector p0: (a1, ..., am, b1, ..., bn).
311	* aj are the image j parameters, bi are the i-th point parameters,
312	* size mcnp + npnp
313	*/
314	const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
315	const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */
316	double x, / measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
317	* x_ij is the projection of the i-th point on the j-th image.
318	* NOTE: some of the x_ij might be missing, if point i is not visible in image j;
319	* see vmask[i][j], max. size nmmnp
320	*/
321	const int mnp,/* number of parameters for EACH measurement; usually 2 */
322	void (func)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double hx, void *adata),
323	/* functional relation describing measurements. Given a parameter vector p,
324	* computes a prediction of the measurements \hat{x}. p is (mcnp + npnp)x1,
325	* \hat{x} is (nmmnp)x1, maximum
326	* rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
327	* as working memory
328	*/
329	void (fjac)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double jac, void *adata),
330	/* function to evaluate the sparse jacobian dX/dp.
331	* The Jacobian is returned in jac as
332	* (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m,
333	* dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
334	* jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm, B_11, ..., B_1m, ..., B_n1, ..., B_nm)
335	* Notice that depending on idxij, some of the A_ij and B_ij might be missing.
336	* Note also that A_ij and B_ij are mnp x cnp and mnp x pnp matrices resp. and they
337	* should be stored in jac in row-major order.
338	* rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
339	* as working memory
340	*
341	* If NULL, the jacobian is approximated by repetitive func calls and finite
342	* differences. This is computationally inefficient and thus NOT recommended.
343	*/
344	void adata, / pointer to possibly additional data, passed uninterpreted to func, fjac */
345
346	int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
347	int verbose, /* I: verbosity */
348	double opts[SBA_OPTSSZ],
349	/* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu,
350	* stopping thresholds for \|\|J^T e\|\|_inf, \|\|dp\|\|_2 and \|\|e\|\|_2
351	*/
352	double info[SBA_INFOSZ]
353	/* O: information regarding the minimization. Set to NULL if don't care
354	* info[0]=\|\|e\|\|_2 at initial p.
355	* info[1-4]=[ \|\|e\|\|_2, \|\|J^T e\|\|_inf, \|\|dp\|\|_2, mu/max[J^T J]_ii ], all computed at estimated p.
356	* info[5]= # iterations,
357	* info[6]=reason for terminating: 1 - stopped by small gradient J^T e
358	* 2 - stopped by small dp
359	* 3 - stopped by itmax
360	* 4 - singular matrix. Restart from current p with increased mu
361	* 5 - stopped by small \|\|e\|\|_2
362	* 6 - too many attempts to increase damping. Restart with increased mu
363	* info[7]= # function evaluations
364	* info[8]= # jacobian evaluations
365	* info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
366	*/
367	)
368	{
369	register int i, j, ii, jj, k, l;
370	int nvis, nnz, retval;
371
372	/* The following are work arrays that are dynamically allocated by sba_motstr_levmar_x() */
373	double jac; / work array for storing the jacobian, max. size nm(mnpcnp + mnppnp) */
374	double U; / work array for storing the U_j in the order U_1, ..., U_m, size mcnpcnp */
375	double V; / work array for storing the V_i in the order V_1, ..., V_n, size npnppnp */
376	double V_1; / work array for storing the (V_i)^-1 in the order (V_1)^-1, ..., (V_n)^-1, size npnppnp /
377
378	double e; / work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
379	max. size nmmnp */
380	double eab; / work array for storing the ea_j & eb_i in the order ea_1, .. ea_m eb_1, .. eb_n size mcnp + npnp */
381
382	double E; / work array for storing the e_j in the order e_1, .. e_m, size mcnp /
383
384	/* Notice that the blocks W_ij, Y_ij are zero iff A_ij (equivalently B_ij) is zero. This means
385	* that the matrices consisting of blocks W_ij and Y_ij are themselves sparse, similarly to the
386	* block matrices made up of the A_ij and B_ij (i.e. jaca and jacb)
387	*/
388	double W; / work array for storing the W_ij in the order W_11, ..., W_1m, ..., W_n1, ..., W_nm,
389	max. size nmcnppnp /
390	double Y; / work array for storing the Y_ij in the order Y_11, ..., Y_1m, ..., Y_n1, ..., Y_nm,
391	max. size nmcnppnp /
392	double YWt; / work array for storing \sum_i Y_ij W_ik^T, size cnpcnp /
393	double S; / work array for storing the block array S_jk, size mmcnpcnp /
394	double dp; / work array for storing the parameter vector updates da_1, ..., da_m, db_1, ..., db_n, size mcnp + npnp */
395	double Wtda; / work array for storing \sum_j W_ij^T da_j, size pnp */
396
397	/* Of the above arrays, jac, e, W, Y are sparse and
398	* U, V, V_1, eab, E, S, dp are dense. Sparse arrays are indexed through
399	* idxij (see below), that is with the same mechanism as the input
400	* measurements vector x
401	*/
402
403	double pa, pb, jaca, jacb, ea, eb, dpa, dpb; /* pointers into p, jac, eab and dp respectively */
404
405	/* submatrices sizes */
406	int Asz, Bsz, Usz, Vsz,
407	Wsz, Ysz, esz, easz, ebsz,
408	YWtsz, Wtdasz, Sblsz;
409
410	int Sdim; /* S matrix actual dimension */
411
412	register double ptr1, ptr2, ptr3, ptr4, sum;
413	struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij
414	* in jaca, B_ij in jacb, etc.
415	* This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
416	* index k and it is used to efficiently lookup the memory locations where the non-zero
417	* blocks of a sparse matrix/vector are stored
418	*/
419	int maxnm=(n>=m)? n:m, /* max. of (n, m) */
420	rcidxs, / work array for the indexes corresponding to the nonzero elements of a single row or
421	column in a sparse matrix, size max(n, m) */
422	rcsubs; / work array for the subscripts of nonzero elements in a single row or column of a
423	sparse matrix, size max(n, m) */
424
425	/* The following variables are needed by the LM algorithm */
426	register int itno; /* iteration counter */
427	int issolved;
428	/* temporary work arrays that are dynamically allocated */
429	double hx, / \hat{x}_i, max. size mnmnp */
430	diagUV, / diagonals of U_j, V_i, size mcnp + npnp */
431	pdp; / p + dp, size mcnp + npnp */
432
433	double diagU, diagV; /* pointers into diagUV */
434
435	register double mu, /* damping constant */
436	tmp; /* mainly used in matrix & vector multiplications */
437	double p_eL2, eab_inf, pdp_eL2; /* \|\|e(p)\|\|_2, \|\|J^T e\|\|_inf, \|\|e(p+dp)\|\|_2 */
438	double p_L2, dp_L2=DBL_MAX, dF, dL;
439	double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]opts[2], eps3_sq=opts[3]opts[3];
440	double init_p_eL2;
441	int nu=2, nu2, stop, nfev, njev=0, nlss=0;
442	int nobs, nvars;
443	const int mmcon=m-mcon;
444
445	struct fdj_data_x_ fdj_data;
446	void *jac_adata;
447
448	/* Initialization */
449
450	/* block sizes */
451	Asz=mnp * cnp; Bsz=mnp * pnp;
452	Usz=cnp * cnp; Vsz=pnp * pnp;
453	Wsz=cnp * pnp; Ysz=cnp * pnp;
454	esz=mnp;
455	easz=cnp; ebsz=pnp;
456	YWtsz=cnp * cnp;
457	Wtdasz=pnp;
458	Sblsz=cnp * cnp;
459	Sdim=mmcon * cnp;
460
461	/* count total number of visible image points */
462	for(i=nvis=0, jj=n*m; i<jj; ++i)
463	nvis+=vmask[i];
464
465	nobs=nvis*mnp;
466	nvars=mcnp + npnp;
467	if(nobs<nvars){
468	fprintf(stderr, "sba_motstr_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
469	exit(1);
470	}
471
472	/* allocate work arrays */
473	jac=(double )emalloc(nvis(Asz+Bsz)*sizeof(double));
474	U=(double )emalloc(mUsz*sizeof(double));
475	V=(double )emalloc(nVsz*sizeof(double));
476	V_1=(double )emalloc(nVsz*sizeof(double));
477	e=(double )emalloc(nobssizeof(double));
478	eab=(double )emalloc(nvarssizeof(double));
479	E=(double )emalloc(mcnp*sizeof(double));
480	W=(double )emalloc(nvisWsz*sizeof(double));
481	Y=(double )emalloc(nvisYsz*sizeof(double));
482	YWt=(double )emalloc(YWtszsizeof(double));
483	S=(double )emalloc(mmSblszsizeof(double));
484	dp=(double )emalloc(nvarssizeof(double));
485	Wtda=(double )emalloc(pnpsizeof(double));
486	rcidxs=(int )emalloc(maxnmsizeof(int));
487	rcsubs=(int )emalloc(maxnmsizeof(int));
488
489
490	hx=(double )emalloc(nobssizeof(double));
491	diagUV=(double )emalloc(nvarssizeof(double));
492	pdp=(double )emalloc(nvarssizeof(double));
493
494
495	/* set up auxiliary pointers */
496	pa=p; pb=p+m*cnp;
497	jaca=jac; jacb=jac+nvis*Asz;
498	ea=eab; eb=eab+m*cnp;
499	dpa=dp; dpb=dp+m*cnp;
500
501	diagU=diagUV; diagV=diagUV + m*cnp;
502
503
504	/* allocate & fill up the idxij structure */
505	sba_crsm_alloc(&idxij, n, m, nvis);
506	for(i=k=0; i<n; ++i){
507	idxij.rowptr[i]=k;
508	ii=i*m;
509	for(j=0; j<m; ++j)
510	if(vmask[ii+j]){
511	idxij.val[k]=k;
512	idxij.colidx[k++]=j;
513	}
514	}
515	idxij.rowptr[n]=nvis;
516
517	/* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
518	if(!fjac){
519	fdj_data.func=func;
520	fdj_data.cnp=cnp;
521	fdj_data.pnp=pnp;
522	fdj_data.mnp=mnp;
523	fdj_data.hx=hx;
524	fdj_data.hxx=(double )emalloc(nobssizeof(double));
525	fdj_data.func_rcidxs=(int )emalloc(2maxnm*sizeof(int));
526	fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm;
527	fdj_data.adata=adata;
528
529	fjac=sba_fdjac_x;
530	jac_adata=(void *)&fdj_data;
531	}
532	else{
533	fdj_data.hxx=NULL;
534	jac_adata=adata;
535	}
536
537	if(itmax==0){ /* verify jacobian */
538	sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, pnp, mnp, adata, jac_adata);
539	retval=0;
540	goto freemem_and_return;
541	}
542
543	/* compute the error vectors e_ij in hx */
544	(*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
545	/* compute e=x - f(p) and its L2 norm */
546	for(i=0, p_eL2=0.0; i<nobs; ++i){
547	e[i]=tmp=x[i]-hx[i];
548	p_eL2+=tmp*tmp;
549	}
550
551	if(verbose) printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
552	init_p_eL2=p_eL2;
553
554	for(itno=stop=0; itno<itmax && !stop; ++itno){
555	/* Note that p, e and \|\|e\|\|_2 have been updated at the previous iteration */
556
557	/* compute derivative submatrices A_ij, B_ij */
558	(*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
559
560	/* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
561	/* U_j is symmetric, therefore its computation can be speeded up by
562	* computing only the upper part and then reusing it for the lower one.
563	* Recall that A_ij is mnp x cnp
564	*/
565	/* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
566	/* Recall that e_ij is mnp x 1
567	*/
568	_dblzero(U, mUsz); / clear all U_j */
569	_dblzero(ea, measz); / clear all ea_j */
570	for(j=mcon; j<m; ++j){
571	ptr1=U + j*Usz; // set ptr1 to point to U_j
572	ptr2=ea + j*easz; // set ptr2 to point to ea_j
573
574	nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
575	for(i=0; i<nnz; ++i){
576	/* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
577	ptr3=jaca + idxij.val[rcidxs[i]]*Asz;
578
579	/* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
580	for(ii=0; ii<cnp; ++ii){
581	for(jj=ii; jj<cnp; ++jj){
582	for(k=0, sum=0.0; k<mnp; ++k)
583	sum+=ptr3[kcnp+ii]ptr3[k*cnp+jj];
584	ptr1[ii*cnp+jj]+=sum;
585	}
586
587	/* copy the LOWER TRIANGULAR PART of U_j from the upper one */
588	for(jj=0; jj<ii; ++jj)
589	ptr1[iicnp+jj]=ptr1[jjcnp+ii];
590	}
591
592	ptr4=e + idxij.val[rcidxs[i]]esz; / set ptr4 to point to e_ij */
593	/* compute A_ij^T e_ij and add it to ea_j */
594	for(ii=0; ii<cnp; ++ii){
595	for(jj=0, sum=0.0; jj<mnp; ++jj)
596	sum+=ptr3[jjcnp+ii]ptr4[jj];
597	ptr2[ii]+=sum;
598	}
599	}
600	}
601
602	/* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
603	/* V_i is symmetric, therefore its computation can be speeded up by
604	* computing only the upper part and then reusing it for the lower one.
605	* Recall that B_ij is mnp x pnp
606	*/
607	/* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
608	/* Recall that e_ij is mnp x 1
609	*/
610	_dblzero(V, nVsz); / clear all V_i */
611	_dblzero(eb, nebsz); / clear all eb_i */
612	for(i=0; i<n; ++i){
613	ptr1=V + i*Vsz; // set ptr1 to point to V_i
614	ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
615
616	nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
617	for(j=0; j<nnz; ++j){
618	/* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
619	ptr3=jacb + idxij.val[rcidxs[j]]*Bsz;
620
621	/* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
622	for(ii=0; ii<pnp; ++ii){
623	for(jj=ii; jj<pnp; ++jj){
624	for(k=0, sum=0.0; k<mnp; ++k)
625	sum+=ptr3[kpnp+ii]ptr3[k*pnp+jj];
626	ptr1[ii*pnp+jj]+=sum;
627	}
628
629	/* copy the LOWER TRIANGULAR PART of V_i from the upper one */
630	for(jj=0; jj<ii; ++jj)
631	ptr1[iipnp+jj]=ptr1[jjpnp+ii];
632	}
633
634	ptr4=e + idxij.val[rcidxs[j]]esz; / set ptr4 to point to e_ij */
635	/* compute B_ij^T e_ij and add it to eb_i */
636	for(ii=0; ii<pnp; ++ii){
637	for(jj=0, sum=0.0; jj<mnp; ++jj)
638	sum+=ptr3[jjpnp+ii]ptr4[jj];
639	ptr2[ii]+=sum;
640	}
641	}
642	}
643
644	/* compute W_ij = A_ij^T B_ij */ // \Sigma here!
645	/* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp
646	*/
647	_dblzero(W, nvisWsz); / clear all W_ij */
648	for(i=0; i<n; ++i){
649	nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
650	for(j=0; j<nnz; ++j){
651	/* set ptr1 to point to W_ij, actual column number in rcsubs[j] */
652	if(rcsubs[j]<mcon) continue; /* A_ij is zero */
653
654	ptr1=W + idxij.val[rcidxs[j]]*Wsz;
655	/* set ptr2 & ptr3 to point to A_ij & B_ij resp. */
656	ptr2=jaca + idxij.val[rcidxs[j]]*Asz;
657	ptr3=jacb + idxij.val[rcidxs[j]]*Bsz;
658	/* compute A_ij^T B_ij and store it in W_ij */
659	for(ii=0; ii<cnp; ++ii)
660	for(jj=0; jj<pnp; ++jj){
661	for(k=0, sum=0.0; k<mnp; ++k)
662	sum+=ptr2[kcnp+ii]ptr3[k*pnp+jj];
663	ptr1[ii*pnp+jj]=sum;
664	}
665	}
666	}
667
668	/* Compute \|\|J^T e\|\|_inf and \|\|p\|\|^2 */
669	for(i=0, p_L2=eab_inf=0.0; i<nvars; ++i){
670	if(eab_inf < (tmp=FABS(eab[i]))) eab_inf=tmp;
671	p_L2+=p[i]*p[i];
672	}
673	//p_L2=sqrt(p_L2);
674
675	/* save diagonal entries so that augmentation can be later canceled.
676	* Diagonal entries are in U_j and V_i
677	*/
678	for(j=mcon; j<m; ++j){
679	ptr1=U + j*Usz; // set ptr1 to point to U_j
680	ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
681	for(i=0; i<cnp; ++i)
682	ptr2[i]=ptr1[i*cnp+i];
683	}
684	for(i=0; i<n; ++i){
685	ptr1=V + i*Vsz; // set ptr1 to point to V_i
686	ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
687	for(j=0; j<pnp; ++j)
688	ptr2[j]=ptr1[j*pnp+j];
689	}
690
691	/*
692	if(!(itno%100)){
693	printf("Current estimate: ");
694	for(i=0; i<nvars; ++i)
695	printf("%.9g ", p[i]);
696	printf("-- errors %.9g %0.9g\n", eab_inf, p_eL2);
697	}
698	*/
699
700	/* check for convergence */
701	if((eab_inf <= eps1)){
702	dp_L2=0.0; /* no increment for p in this case */
703	stop=1;
704	break;
705	}
706
707	/* compute initial damping factor */
708	if(itno==0){
709	for(i=mcon*cnp, tmp=DBL_MIN; i<nvars; ++i)
710	if(diagUV[i]>tmp) tmp=diagUV[i]; /* find max diagonal element */
711	mu=tau*tmp;
712	}
713
714	/* determine increment using adaptive damping */
715	while(1){
716	/* augment U, V */
717	for(j=mcon; j<m; ++j){
718	ptr1=U + j*Usz; // set ptr1 to point to U_j
719	for(i=0; i<cnp; ++i)
720	ptr1[i*cnp+i]+=mu;
721	}
722	for(i=0; i<n; ++i){
723	ptr1=V + i*Vsz; // set ptr1 to point to V_i
724	for(j=0; j<pnp; ++j)
725	ptr1[j*pnp+j]+=mu;
726
727	/* compute V_i^-1 /
728	ptr2=V_1 + iVsz; // set ptr2 to point to (V_i)^-1
729	/* inverting V_i with LU seems to be faster than Cholesky /
730	j=sba_mat_invert_LU(ptr1, ptr2, pnp); //invert the pnp x pnp matrix pointed to by ptr1 and save it in ptr2
731	//j=sba_mat_invert_Chol(ptr1, ptr2, pnp); //invert the pnp x pnp matrix pointed to by ptr1 and save it in ptr2
732	if(!j){
733	fprintf(stderr, "Singular matrix V*_i (i=%d) in sba_motstr_levmar_x()\n", i);
734	exit(1);
735	}
736	}
737
738	/* compute Y_ij = W_ij (V*_i)^-1
739	* Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp
740	*/
741	_dblzero(Y, nvisYsz); / clear all Y_ij */
742	for(i=0; i<n; ++i){
743	/* set ptr3 to point to (V_i)^-1 /
744	ptr3=V_1 + i*Vsz;
745	nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
746	for(j=0; j<nnz; ++j){
747	/* set ptr1 to point to Y_ij, actual column number in rcsubs[j] */
748	if(rcsubs[j]<mcon) continue; /* W_ij is zero */
749
750	ptr1=Y + idxij.val[rcidxs[j]]*Ysz;
751	/* set ptr2 to point to W_ij resp. */
752	ptr2=W + idxij.val[rcidxs[j]]*Wsz;
753	/* compute W_ij (V_i)^-1 and store it in Y_ij /
754	for(ii=0; ii<cnp; ++ii)
755	for(jj=0; jj<pnp; ++jj){
756	for(k=0, sum=0.0; k<pnp; ++k)
757	sum+=ptr2[iipnp+k]ptr3[k*pnp+jj];
758	ptr1[ii*pnp+jj]=sum;
759	}
760	}
761	}
762
763	_dblzero(E, measz); / clear all e_j */
764	/* compute the mmcon x mmcon block matrix S and e_j */
765
766	/* Note that S is symmetric, therefore its computation can be
767	* speeded up by computing only the upper part and then reusing
768	* it for the lower one.
769	*/
770	for(j=mcon; j<m; ++j){
771	nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */
772
773	/* compute the UPPER TRIANGULAR PART of S */
774	for(k=j; k<m; ++k){ // j>=mcon
775	/* compute \sum_i Y_ij W_ik^T in YWt */
776	/* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */
777	_dblzero(YWt, YWtsz); /* clear YWt */
778
779	for(i=0; i<nnz; ++i){
780	register double *pYWt;
781
782	/* find the min and max column indices of the elements in row i (actually rcsubs[i])
783	* and make sure that k falls within them. This test handles W_ik's which are
784	* certain to be zero without bothering to call sba_crsm_elmidx()
785	*/
786	ii=idxij.colidx[idxij.rowptr[rcsubs[i]]];
787	jj=idxij.colidx[idxij.rowptr[rcsubs[i]+1]-1];
788	if(k<ii \|\| k>jj) continue; /* W_ik == 0 */
789
790	/* set ptr2 to point to W_ik */
791	l=sba_crsm_elmidx(&idxij, rcsubs[i], k);
792	if(l==-1) continue; /* W_ik == 0 */
793
794	ptr2=W + idxij.val[l]*Wsz;
795	/* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */
796	ptr1=Y + idxij.val[rcidxs[i]]*Ysz;
797	for(ii=0; ii<cnp; ++ii){
798	ptr3=ptr1+ii*pnp;
799	pYWt=YWt+ii*cnp;
800
801	for(jj=0; jj<cnp; ++jj){
802	ptr4=ptr2+jj*pnp;
803	for(l=0, sum=0.0; l<pnp; ++l)
804	sum+=ptr3[l]ptr4[l]; //ptr1[iipnp+l]ptr2[jjpnp+l];
805	pYWt[jj]+=sum; //YWt[ii*cnp+jj]+=sum;
806	}
807	}
808	}
809
810	ptr1=U + j*Usz; // set ptr1 to point to U_j
811
812	/* since the linear system involving S is solved with lapack,
813	* it is preferable to store S in column major (i.e. fortran)
814	* order, so as to avoid unecessary transposing/copying.
815	*/
816	#if MAT_STORAGE==COLUMN_MAJOR
817	ptr2=S + (k-mcon)mmconUsz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S
818	#else
819	ptr2=S + (j-mcon)mmconUsz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S
820	#endif
821
822	if(j!=k){ /* Kronecker */
823	for(ii=0; ii<cnp; ++ii, ptr2+=Sdim)
824	for(jj=0; jj<cnp; ++jj)
825	ptr2[jj]=
826	#if MAT_STORAGE==COLUMN_MAJOR
827	-YWt[jj*cnp+ii];
828	#else
829	-YWt[ii*cnp+jj];
830	#endif
831	}
832	else{
833	for(ii=0; ii<cnp; ++ii, ptr2+=Sdim)
834	for(jj=0; jj<cnp; ++jj)
835	ptr2[jj]=
836	#if MAT_STORAGE==COLUMN_MAJOR
837	ptr1[jjcnp+ii] - YWt[jjcnp+ii];
838	#else
839	ptr1[iicnp+jj] - YWt[iicnp+jj];
840	#endif
841	}
842	}
843
844	/* copy the LOWER TRIANGULAR PART of S from the upper one */
845	for(k=mcon; k<j; ++k){
846	#if MAT_STORAGE==COLUMN_MAJOR
847	ptr1=S + (k-mcon)mmconUsz + (j-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S
848	ptr2=S + (j-mcon)mmconUsz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S
849	#else
850	ptr1=S + (j-mcon)mmconUsz + (k-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S
851	ptr2=S + (k-mcon)mmconUsz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S
852	#endif
853	for(ii=0; ii<cnp; ++ii, ptr1+=Sdim)
854	for(jj=0, ptr3=ptr2+ii; jj<cnp; ++jj, ptr3+=Sdim)
855	ptr1[jj]=*ptr3;
856	}
857
858	/* compute e_j=ea_j - \sum_i Y_ij eb_i */
859	/* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */
860	ptr1=E + j*easz; // set ptr1 to point to e_j
861
862	for(i=0; i<nnz; ++i){
863	/* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */
864	ptr2=Y + idxij.val[rcidxs[i]]*Ysz;
865
866	/* set ptr3 to point to eb_i */
867	ptr3=eb + rcsubs[i]*ebsz;
868	for(ii=0; ii<cnp; ++ii){
869	ptr4=ptr2+ii*pnp;
870	for(jj=0, sum=0.0; jj<pnp; ++jj)
871	sum+=ptr4[jj]ptr3[jj]; //ptr2[iipnp+jj]*ptr3[jj];
872	ptr1[ii]+=sum;
873	}
874	}
875
876	ptr2=ea + j*easz; // set ptr2 to point to ea_j
877	for(i=0; i<easz; ++i)
878	ptr1[i]=ptr2[i] - ptr1[i];
879	}
880
881	#if 0
882	if(verbose>1){ /* count the nonzeros in S */
883	for(i=ii=0; i<Sdim*Sdim; ++i)
884	if(S[i]!=0.0) ++ii;
885	printf("\nS density %10g\n", ((double)ii)/(Sdim*Sdim));
886
887	}
888	#endif
889
890	/* solve the linear system S dpa = E to compute the da_j.
891	*
892	* Note that if MAT_STORAGE==1 S is modified in the following call;
893	* this is OK since S is recomputed for each iteration
894	*/
895	//issolved=sba_Axb_LU(S, E+mconcnp, dpa+mconcnp, Sdim, MAT_STORAGE); ++nlss;
896	issolved=sba_Axb_Chol(S, E+mconcnp, dpa+mconcnp, Sdim, MAT_STORAGE); ++nlss;
897	//issolved=sba_Axb_QRnoQ(S, E+mconcnp, dpa+mconcnp, Sdim, MAT_STORAGE); ++nlss;
898	//issolved=sba_Axb_QR(S, E+mconcnp, dpa+mconcnp, Sdim, MAT_STORAGE); ++nlss;
899	//issolved=sba_Axb_SVD(S, E+mconcnp, dpa+mconcnp, Sdim, MAT_STORAGE); ++nlss;
900	//issolved=sba_Axb_CG(S, E+mconcnp, dpa+mconcnp, Sdim, (3*Sdim)/2, 1E-10, SBA_CG_JACOBI, MAT_STORAGE); ++nlss;
901
902	_dblzero(dpa, mconcnp); / no change for the first mcon camera params */
903
904	if(issolved){
905
906	/* compute the db_i */
907	for(i=0; i<n; ++i){
908	ptr1=dpb + i*ebsz; // set ptr1 to point to db_i
909
910	/* compute \sum_j W_ij^T da_j */
911	/* Recall that W_ij is cnp x pnp and da_j is cnp x 1 */
912	_dblzero(Wtda, Wtdasz); /* clear Wtda */
913	nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
914	for(j=0; j<nnz; ++j){
915	/* set ptr2 to point to W_ij, actual column number in rcsubs[j] */
916	if(rcsubs[j]<mcon) continue; /* W_ij is zero */
917
918	ptr2=W + idxij.val[rcidxs[j]]*Wsz;
919
920	/* set ptr3 to point to da_j */
921	ptr3=dpa + rcsubs[j]*cnp;
922
923	for(ii=0; ii<pnp; ++ii){
924	ptr4=ptr2+ii;
925	for(jj=0, sum=0.0; jj<cnp; ++jj)
926	sum+=ptr4[jjpnp]ptr3[jj]; //ptr2[jjpnp+ii]ptr3[jj];
927	Wtda[ii]+=sum;
928	}
929	}
930
931	/* compute eb_i - \sum_j W_ij^T da_j = eb_i - Wtda in Wtda */
932	ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
933	for(ii=0; ii<pnp; ++ii)
934	Wtda[ii]=ptr2[ii] - Wtda[ii];
935
936	/* compute the product (V_i)^-1 Wtda = (V_i)^-1 (eb_i - \sum_j W_ij^T da_j) */
937	ptr2=V_1 + iVsz; // set ptr2 to point to (V_i)^-1
938	for(ii=0; ii<pnp; ++ii){
939	for(jj=0, sum=0.0; jj<pnp; ++jj)
940	sum+=ptr2[iipnp+jj]Wtda[jj];
941	ptr1[ii]=sum;
942	}
943	}
944
945	/* parameter vector updates are now in dpa, dpb */
946
947	/* compute p's new estimate and \|\|dp\|\|^2 */
948	for(i=0, dp_L2=0.0; i<nvars; ++i){
949	pdp[i]=p[i] + (tmp=dp[i]);
950	dp_L2+=tmp*tmp;
951	}
952	//dp_L2=sqrt(dp_L2);
953
954	if(dp_L2<=eps2_sqp_L2){ / relative change in p is small, stop */
955	//if(dp_L2<=eps2(p_L2 + eps2)){ / relative change in p is small, stop */
956	stop=2;
957	break;
958	}
959
960	if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
961	//if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
962	stop=4;
963	break;
964	}
965
966	(func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; / evaluate function at p + dp */
967	if(verbose>1)
968	printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
969	for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute \|\|e(pdp)\|\|_2 */
970	hx[i]=tmp=x[i]-hx[i];
971	pdp_eL2+=tmp*tmp;
972	}
973
974	for(i=0, dL=0.0; i<nvars; ++i)
975	dL+=dp[i](mudp[i]+eab[i]);
976
977	dF=p_eL2-pdp_eL2;
978
979	if(verbose>1)
980	printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
981
982	if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
983	tmp=(2.0*dF/dL-1.0);
984	tmp=1.0-tmptmptmp;
985	mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
986	nu=2;
987
988	for(i=0; i<nvars; ++i) /* update p's estimate */
989	p[i]=pdp[i];
990
991	for(i=0; i<nobs; ++i) /* update e and \|\|e\|\|_2 */
992	e[i]=hx[i];
993	p_eL2=pdp_eL2;
994	break;
995	}
996	} /* issolved */
997
998	/* if this point is reached, either the linear system could not be solved or
999	* the error did not reduce; in any case, the increment must be rejected
1000	*/
1001
1002	mu*=nu;
1003	nu2=nu<<1; // 2*nu;
1004	if(nu2<=nu){ /* nu has wrapped around (overflown) */
1005	fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_motstr_levmar_x()! Singular hessian matrix?\n");
1006	//exit(1);
1007	stop=6;
1008	break;
1009	}
1010	nu=nu2;
1011
1012	/* restore U, V diagonal entries */
1013	for(j=mcon; j<m; ++j){
1014	ptr1=U + j*Usz; // set ptr1 to point to U_j
1015	ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1016	for(i=0; i<cnp; ++i)
1017	ptr1[i*cnp+i]=ptr2[i];
1018	}
1019	for(i=0; i<n; ++i){
1020	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1021	ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1022	for(j=0; j<pnp; ++j)
1023	ptr1[j*pnp+j]=ptr2[j];
1024	}
1025	} /* inner loop */
1026
1027	if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
1028	}
1029
1030	if(itno>=itmax) stop=3;
1031
1032	/* restore U, V diagonal entries */
1033	for(j=mcon; j<m; ++j){
1034	ptr1=U + j*Usz; // set ptr1 to point to U_j
1035	ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1036	for(i=0; i<cnp; ++i)
1037	ptr1[i*cnp+i]=ptr2[i];
1038	}
1039	for(i=0; i<n; ++i){
1040	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1041	ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1042	for(j=0; j<pnp; ++j)
1043	ptr1[j*pnp+j]=ptr2[j];
1044	}
1045
1046	if(info){
1047	info[0]=init_p_eL2;
1048	info[1]=p_eL2;
1049	info[2]=eab_inf;
1050	info[3]=dp_L2;
1051	for(j=mcon, tmp=DBL_MIN; j<m; ++j){
1052	ptr1=U + j*Usz; // set ptr1 to point to U_j
1053	for(i=0; i<cnp; ++i)
1054	if(tmp<ptr1[icnp+i]) tmp=ptr1[icnp+i];
1055	}
1056	for(i=0; i<n; ++i){
1057	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1058	for(j=0; j<pnp; ++j)
1059	if(tmp<ptr1[jpnp+j]) tmp=ptr1[jpnp+j];
1060	}
1061	info[4]=mu/tmp;
1062	info[5]=itno;
1063	info[6]=stop;
1064	info[7]=nfev;
1065	info[8]=njev;
1066	info[9]=nlss;
1067	}
1068
1069	//sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs);
1070	retval=(stop!=4)? itno : -1;
1071
1072	freemem_and_return: /* NOTE: this point is also reached via a goto! */
1073
1074	/* free whatever was allocated */
1075	free(jac); free(U); free(V);
1076	free(V_1); free(e); free(eab);
1077	free(E); free(W); free(Y);
1078	free(YWt); free(S); free(dp);
1079	free(Wtda);
1080	free(rcidxs); free(rcsubs);
1081
1082	free(hx); free(diagUV); free(pdp);
1083	if(fdj_data.hxx){ // cleanup
1084	free(fdj_data.hxx);
1085	free(fdj_data.func_rcidxs);
1086	}
1087
1088	sba_crsm_free(&idxij);
1089
1090	return retval;
1091	}
1092
1093
1094	/* Bundle adjustment on camera parameters only
1095	* using the sparse Levenberg-Marquardt as described in HZ p. 568
1096	*/
1097
1098	int sba_mot_levmar_x(
1099	const int n, /* number of points */
1100	const int m, /* number of images */
1101	const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified.
1102	* All A_ij (see below) with j<mcon are assumed to be zero
1103	*/
1104	char vmask, / visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */
1105	double p, / initial parameter vector p0: (a1, ..., am).
1106	* aj are the image j parameters, size mcnp /
1107	const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
1108	double x, / measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
1109	* x_ij is the projection of the i-th point on the j-th image.
1110	* NOTE: some of the x_ij might be missing, if point i is not visible in image j;
1111	* see vmask[i][j], max. size nmmnp
1112	*/
1113	const int mnp,/* number of parameters for EACH measurement; usually 2 */
1114	void (func)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double hx, void *adata),
1115	/* functional relation describing measurements. Given a parameter vector p,
1116	* computes a prediction of the measurements \hat{x}. p is (m*cnp)x1,
1117	* \hat{x} is (nmmnp)x1, maximum
1118	* rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1119	* as working memory
1120	*/
1121	void (fjac)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double jac, void *adata),
1122	/* function to evaluate the sparse jacobian dX/dp.
1123	* The Jacobian is returned in jac as
1124	* (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m), or (using HZ's notation),
1125	* jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm)
1126	* Notice that depending on idxij, some of the A_ij might be missing.
1127	* Note also that the A_ij are mnp x cnp matrices and they
1128	* should be stored in jac in row-major order.
1129	* rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1130	* as working memory
1131	*
1132	* If NULL, the jacobian is approximated by repetitive func calls and finite
1133	* differences. This is computationally inefficient and thus NOT recommended.
1134	*/
1135	void adata, / pointer to possibly additional data, passed uninterpreted to func, fjac */
1136
1137	int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
1138	int verbose, /* I: verbosity */
1139	double opts[SBA_OPTSSZ],
1140	/* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu,
1141	* stopping thresholds for \|\|J^T e\|\|_inf, \|\|dp\|\|_2 and \|\|e\|\|_2
1142	*/
1143	double info[SBA_INFOSZ]
1144	/* O: information regarding the minimization. Set to NULL if don't care
1145	* info[0]=\|\|e\|\|_2 at initial p.
1146	* info[1-4]=[ \|\|e\|\|_2, \|\|J^T e\|\|_inf, \|\|dp\|\|_2, mu/max[J^T J]_ii ], all computed at estimated p.
1147	* info[5]= # iterations,
1148	* info[6]=reason for terminating: 1 - stopped by small gradient J^T e
1149	* 2 - stopped by small dp
1150	* 3 - stopped by itmax
1151	* 4 - singular matrix. Restart from current p with increased mu
1152	* 5 - stopped by small \|\|e\|\|_2
1153	* 6 - too many attempts to increase damping. Restart with increased mu
1154	* info[7]= # function evaluations
1155	* info[8]= # jacobian evaluations
1156	* info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
1157	*/
1158	)
1159	{
1160	register int i, j, ii, jj, k;
1161	int nvis, nnz, retval;
1162
1163	/* The following are work arrays that are dynamically allocated by sba_mot_levmar_x() */
1164	double jac; / work array for storing the jacobian, max. size nmmnpcnp /
1165	double U; / work array for storing the U_j in the order U_1, ..., U_m, size mcnpcnp */
1166
1167	double e; / work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
1168	max. size nmmnp */
1169	double ea; / work array for storing the ea_j in the order ea_1, .. ea_m, size mcnp /
1170
1171	double dp; / work array for storing the parameter vector updates da_1, ..., da_m, size mcnp /
1172
1173	/* Of the above arrays, jac, e are sparse and
1174	* U, ea, dp are dense. Sparse arrays are indexed through
1175	* idxij (see below), that is with the same mechanism as the input
1176	* measurements vector x
1177	*/
1178
1179	/* submatrices sizes */
1180	int Asz, Usz,
1181	esz, easz;
1182
1183	register double ptr1, ptr2, ptr3, ptr4, sum;
1184	struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij
1185	* in jac, e_ij in e, etc.
1186	* This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
1187	* index k and it is used to efficiently lookup the memory locations where the non-zero
1188	* blocks of a sparse matrix/vector are stored
1189	*/
1190	int maxnm=(n>=m)? n:m, /* max. of (n, m) */
1191	rcidxs, / work array for the indexes corresponding to the nonzero elements of a single row or
1192	column in a sparse matrix, size max(n, m) */
1193	rcsubs; / work array for the subscripts of nonzero elements in a single row or column of a
1194	sparse matrix, size max(n, m) */
1195
1196	/* The following variables are needed by the LM algorithm */
1197	register int itno; /* iteration counter */
1198	int nsolved;
1199	/* temporary work arrays that are dynamically allocated */
1200	double hx, / \hat{x}_i, max. size mnmnp */
1201	diagU, / diagonals of U_j, size mcnp /
1202	pdp; / p + dp, size mcnp /
1203
1204	register double mu, /* damping constant */
1205	tmp; /* mainly used in matrix & vector multiplications */
1206	double p_eL2, ea_inf, pdp_eL2; /* \|\|e(p)\|\|_2, \|\|J^T e\|\|_inf, \|\|e(p+dp)\|\|_2 */
1207	double p_L2, dp_L2=DBL_MAX, dF, dL;
1208	double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]opts[2], eps3_sq=opts[3]opts[3];
1209	double init_p_eL2;
1210	int nu=2, nu2, stop, nfev, njev=0, nlss=0;
1211	int nobs, nvars;
1212
1213	struct fdj_data_x_ fdj_data;
1214	void *jac_adata;
1215
1216	/* Initialization */
1217
1218	/* block sizes */
1219	Asz=mnp * cnp; Usz=cnp * cnp;
1220	esz=mnp; easz=cnp;
1221
1222	/* count total number of visible image points */
1223	for(i=nvis=0, jj=n*m; i<jj; ++i)
1224	nvis+=vmask[i];
1225
1226	nobs=nvis*mnp;
1227	nvars=m*cnp;
1228	if(nobs<nvars){
1229	fprintf(stderr, "sba_mot_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
1230	exit(1);
1231	}
1232
1233	/* allocate work arrays */
1234	jac=(double )emalloc(nvisAsz*sizeof(double));
1235	U=(double )emalloc(mUsz*sizeof(double));
1236	e=(double )emalloc(nobssizeof(double));
1237	ea=(double )emalloc(nvarssizeof(double));
1238	dp=(double )emalloc(nvarssizeof(double));
1239	rcidxs=(int )emalloc(maxnmsizeof(int));
1240	rcsubs=(int )emalloc(maxnmsizeof(int));
1241
1242
1243	hx=(double )emalloc(nobssizeof(double));
1244	diagU=(double )emalloc(nvarssizeof(double));
1245	pdp=(double )emalloc(nvarssizeof(double));
1246
1247	/* allocate & fill up the idxij structure */
1248	sba_crsm_alloc(&idxij, n, m, nvis);
1249	for(i=k=0; i<n; ++i){
1250	idxij.rowptr[i]=k;
1251	ii=i*m;
1252	for(j=0; j<m; ++j)
1253	if(vmask[ii+j]){
1254	idxij.val[k]=k;
1255	idxij.colidx[k++]=j;
1256	}
1257	}
1258	idxij.rowptr[n]=nvis;
1259
1260	/* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
1261	if(!fjac){
1262	fdj_data.func=func;
1263	fdj_data.cnp=cnp;
1264	fdj_data.pnp=0;
1265	fdj_data.mnp=mnp;
1266	fdj_data.hx=hx;
1267	fdj_data.hxx=(double )emalloc(nobssizeof(double));
1268	fdj_data.func_rcidxs=(int )emalloc(2maxnm*sizeof(int));
1269	fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm;
1270	fdj_data.adata=adata;
1271
1272	fjac=sba_fdjac_x;
1273	jac_adata=(void *)&fdj_data;
1274	}
1275	else{
1276	fdj_data.hxx=NULL;
1277	jac_adata=adata;
1278	}
1279
1280	if(itmax==0){ /* verify jacobian */
1281	sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata);
1282	retval=0;
1283	goto freemem_and_return;
1284	}
1285
1286	/* compute the error vectors e_ij in hx */
1287	(*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
1288	/* compute e=x - f(p) and its L2 norm */
1289	for(i=0, p_eL2=0.0; i<nobs; ++i){
1290	e[i]=tmp=x[i]-hx[i];
1291	p_eL2+=tmp*tmp;
1292	}
1293
1294	if(verbose) printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
1295	init_p_eL2=p_eL2;
1296
1297	for(itno=stop=0; itno<itmax && !stop; ++itno){
1298	/* Note that p, e and \|\|e\|\|_2 have been updated at the previous iteration */
1299
1300	/* compute derivative submatrices A_ij */
1301	(*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
1302
1303	/* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
1304	/* U_j is symmetric, therefore its computation can be speeded up by
1305	* computing only the upper part and then reusing it for the lower one.
1306	* Recall that A_ij is mnp x cnp
1307	*/
1308	/* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
1309	/* Recall that e_ij is mnp x 1
1310	*/
1311	_dblzero(U, mUsz); / clear all U_j */
1312	_dblzero(ea, measz); / clear all ea_j */
1313	for(j=mcon; j<m; ++j){
1314	ptr1=U + j*Usz; // set ptr1 to point to U_j
1315	ptr2=ea + j*easz; // set ptr2 to point to ea_j
1316
1317	nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
1318	for(i=0; i<nnz; ++i){
1319	/* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
1320	ptr3=jac + idxij.val[rcidxs[i]]*Asz;
1321
1322	/* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
1323	for(ii=0; ii<cnp; ++ii){
1324	for(jj=ii; jj<cnp; ++jj){
1325	for(k=0, sum=0.0; k<mnp; ++k)
1326	sum+=ptr3[kcnp+ii]ptr3[k*cnp+jj];
1327	ptr1[ii*cnp+jj]+=sum;
1328	}
1329
1330	/* copy the LOWER TRIANGULAR PART of U_j from the upper one */
1331	for(jj=0; jj<ii; ++jj)
1332	ptr1[iicnp+jj]=ptr1[jjcnp+ii];
1333	}
1334
1335	ptr4=e + idxij.val[rcidxs[i]]esz; / set ptr4 to point to e_ij */
1336	/* compute A_ij^T e_ij and add it to ea_j */
1337	for(ii=0; ii<cnp; ++ii){
1338	for(jj=0, sum=0.0; jj<mnp; ++jj)
1339	sum+=ptr3[jjcnp+ii]ptr4[jj];
1340	ptr2[ii]+=sum;
1341	}
1342	}
1343	}
1344
1345	/* Compute \|\|J^T e\|\|_inf and \|\|p\|\|^2 */
1346	for(i=0, p_L2=ea_inf=0.0; i<nvars; ++i){
1347	if(ea_inf < (tmp=FABS(ea[i]))) ea_inf=tmp;
1348	p_L2+=p[i]*p[i];
1349	}
1350	//p_L2=sqrt(p_L2);
1351
1352	/* save diagonal entries so that augmentation can be later canceled.
1353	* Diagonal entries are in U_j
1354	*/
1355	for(j=mcon; j<m; ++j){
1356	ptr1=U + j*Usz; // set ptr1 to point to U_j
1357	ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1358	for(i=0; i<cnp; ++i)
1359	ptr2[i]=ptr1[i*cnp+i];
1360	}
1361
1362	/*
1363	if(!(itno%100)){
1364	printf("Current estimate: ");
1365	for(i=0; i<nvars; ++i)
1366	printf("%.9g ", p[i]);
1367	printf("-- errors %.9g %0.9g\n", ea_inf, p_eL2);
1368	}
1369	*/
1370
1371	/* check for convergence */
1372	if((ea_inf <= eps1)){
1373	dp_L2=0.0; /* no increment for p in this case */
1374	stop=1;
1375	break;
1376	}
1377
1378	/* compute initial damping factor */
1379	if(itno==0){
1380	for(i=mcon*cnp, tmp=DBL_MIN; i<nvars; ++i)
1381	if(diagU[i]>tmp) tmp=diagU[i]; /* find max diagonal element */
1382	mu=tau*tmp;
1383	}
1384
1385	/* determine increment using adaptive damping */
1386	while(1){
1387	/* augment U */
1388	for(j=mcon; j<m; ++j){
1389	ptr1=U + j*Usz; // set ptr1 to point to U_j
1390	for(i=0; i<cnp; ++i)
1391	ptr1[i*cnp+i]+=mu;
1392	}
1393
1394	/* solve the linear systems U_j da_j = ea_j to compute the da_j */
1395	_dblzero(dp, mconcnp); / no change for the first mcon camera params */
1396	for(j=nsolved=mcon; j<m; ++j){
1397	ptr1=U + j*Usz; // set ptr1 to point to U_j
1398	ptr2=ea + j*easz; // set ptr2 to point to ea_j
1399	ptr3=dp + j*cnp; // set ptr3 to point to da_j
1400
1401	//nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1402	nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1403	//nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1404	//nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1405	//nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1406	//nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1407	//nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); ++nlss;
1408	}
1409
1410	if(nsolved==m){
1411
1412	/* parameter vector updates are now in dp */
1413
1414	/* compute p's new estimate and \|\|dp\|\|^2 */
1415	for(i=0, dp_L2=0.0; i<nvars; ++i){
1416	pdp[i]=p[i] + (tmp=dp[i]);
1417	dp_L2+=tmp*tmp;
1418	}
1419	//dp_L2=sqrt(dp_L2);
1420
1421	if(dp_L2<=eps2_sqp_L2){ / relative change in p is small, stop */
1422	//if(dp_L2<=eps2(p_L2 + eps2)){ / relative change in p is small, stop */
1423	stop=2;
1424	break;
1425	}
1426
1427	if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
1428	//if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
1429	stop=4;
1430	break;
1431	}
1432
1433	(func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; / evaluate function at p + dp */
1434	if(verbose>1)
1435	printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
1436	for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute \|\|e(pdp)\|\|_2 */
1437	hx[i]=tmp=x[i]-hx[i];
1438	pdp_eL2+=tmp*tmp;
1439	}
1440
1441	for(i=0, dL=0.0; i<nvars; ++i)
1442	dL+=dp[i](mudp[i]+ea[i]);
1443
1444	dF=p_eL2-pdp_eL2;
1445
1446	if(verbose>1)
1447	printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
1448
1449	if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
1450	tmp=(2.0*dF/dL-1.0);
1451	tmp=1.0-tmptmptmp;
1452	mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
1453	nu=2;
1454
1455	for(i=0; i<nvars; ++i) /* update p's estimate */
1456	p[i]=pdp[i];
1457
1458	for(i=0; i<nobs; ++i) /* update e and \|\|e\|\|_2 */
1459	e[i]=hx[i];
1460	p_eL2=pdp_eL2;
1461	break;
1462	}
1463	} /* nsolved==m */
1464
1465	/* if this point is reached, either at least one linear system could not be solved or
1466	* the error did not reduce; in any case, the increment must be rejected
1467	*/
1468
1469	mu*=nu;
1470	nu2=nu<<1; // 2*nu;
1471	if(nu2<=nu){ /* nu has wrapped around (overflown) */
1472	fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_mot_levmar_x()! Singular hessian matrix?\n");
1473	//exit(1);
1474	stop=6;
1475	break;
1476	}
1477	nu=nu2;
1478
1479	/* restore U diagonal entries */
1480	for(j=mcon; j<m; ++j){
1481	ptr1=U + j*Usz; // set ptr1 to point to U_j
1482	ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1483	for(i=0; i<cnp; ++i)
1484	ptr1[i*cnp+i]=ptr2[i];
1485	}
1486	} /* inner loop */
1487
1488	if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
1489	}
1490
1491	if(itno>=itmax) stop=3;
1492
1493	/* restore U diagonal entries */
1494	for(j=mcon; j<m; ++j){
1495	ptr1=U + j*Usz; // set ptr1 to point to U_j
1496	ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1497	for(i=0; i<cnp; ++i)
1498	ptr1[i*cnp+i]=ptr2[i];
1499	}
1500
1501	if(info){
1502	info[0]=init_p_eL2;
1503	info[1]=p_eL2;
1504	info[2]=ea_inf;
1505	info[3]=dp_L2;
1506	for(j=mcon, tmp=DBL_MIN; j<m; ++j){
1507	ptr1=U + j*Usz; // set ptr1 to point to U_j
1508	for(i=0; i<cnp; ++i)
1509	if(tmp<ptr1[icnp+i]) tmp=ptr1[icnp+i];
1510	}
1511	info[4]=mu/tmp;
1512	info[5]=itno;
1513	info[6]=stop;
1514	info[7]=nfev;
1515	info[8]=njev;
1516	info[9]=nlss;
1517	}
1518	//sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs);
1519	retval=(stop!=4)? itno : -1;
1520
1521	freemem_and_return: /* NOTE: this point is also reached via a goto! */
1522
1523	/* free whatever was allocated */
1524	free(jac); free(U);
1525	free(e); free(ea);
1526	free(dp);
1527	free(rcidxs); free(rcsubs);
1528
1529	free(hx); free(diagU); free(pdp);
1530	if(fdj_data.hxx){ // cleanup
1531	free(fdj_data.hxx);
1532	free(fdj_data.func_rcidxs);
1533	}
1534
1535	sba_crsm_free(&idxij);
1536
1537	return retval;
1538	}
1539
1540
1541	/* Bundle adjustment on structure parameters only
1542	* using the sparse Levenberg-Marquardt as described in HZ p. 568
1543	*/
1544
1545	int sba_str_levmar_x(
1546	const int n, /* number of points */
1547	const int m, /* number of images */
1548	char vmask, / visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */
1549	double p, / initial parameter vector p0: (b1, ..., bn).
1550	* bi are the i-th point parameters, * size npnp /
1551	const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */
1552	double x, / measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
1553	* x_ij is the projection of the i-th point on the j-th image.
1554	* NOTE: some of the x_ij might be missing, if point i is not visible in image j;
1555	* see vmask[i][j], max. size nmmnp
1556	*/
1557	const int mnp,/* number of parameters for EACH measurement; usually 2 */
1558	void (func)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double hx, void *adata),
1559	/* functional relation describing measurements. Given a parameter vector p,
1560	* computes a prediction of the measurements \hat{x}. p is (n*pnp)x1,
1561	* \hat{x} is (nmmnp)x1, maximum
1562	* rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1563	* as working memory
1564	*/
1565	void (fjac)(double p, struct sba_crsm idxij, int rcidxs, int rcsubs, double jac, void *adata),
1566	/* function to evaluate the sparse jacobian dX/dp.
1567	* The Jacobian is returned in jac as
1568	* (dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
1569	* jac=(B_11, ..., B_1m, ..., B_n1, ..., B_nm)
1570	* Notice that depending on idxij, some of the B_ij might be missing.
1571	* Note also that B_ij are mnp x pnp matrices and they
1572	* should be stored in jac in row-major order.
1573	* rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1574	* as working memory
1575	*
1576	* If NULL, the jacobian is approximated by repetitive func calls and finite
1577	* differences. This is computationally inefficient and thus NOT recommended.
1578	*/
1579	void adata, / pointer to possibly additional data, passed uninterpreted to func, fjac */
1580
1581	int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
1582	int verbose, /* I: verbosity */
1583	double opts[SBA_OPTSSZ],
1584	/* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu,
1585	* stopping thresholds for \|\|J^T e\|\|_inf, \|\|dp\|\|_2 and \|\|e\|\|_2
1586	*/
1587	double info[SBA_INFOSZ]
1588	/* O: information regarding the minimization. Set to NULL if don't care
1589	* info[0]=\|\|e\|\|_2 at initial p.
1590	* info[1-4]=[ \|\|e\|\|_2, \|\|J^T e\|\|_inf, \|\|dp\|\|_2, mu/max[J^T J]_ii ], all computed at estimated p.
1591	* info[5]= # iterations,
1592	* info[6]=reason for terminating: 1 - stopped by small gradient J^T e
1593	* 2 - stopped by small dp
1594	* 3 - stopped by itmax
1595	* 4 - singular matrix. Restart from current p with increased mu
1596	* 5 - stopped by small \|\|e\|\|_2
1597	* 6 - too many attempts to increase damping. Restart with increased mu
1598	* info[7]= # function evaluations
1599	* info[8]= # jacobian evaluations
1600	* info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
1601	*/
1602	)
1603	{
1604	register int i, j, ii, jj, k;
1605	int nvis, nnz, retval;
1606
1607	/* The following are work arrays that are dynamically allocated by sba_str_levmar_x() */
1608	double jac; / work array for storing the jacobian, max. size nmmnppnp /
1609	double V; / work array for storing the V_i in the order V_1, ..., V_n, size npnppnp */
1610
1611	double e; / work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
1612	max. size nmmnp */
1613	double eb; / work array for storing the eb_i in the order eb_1, .. eb_n size npnp /
1614
1615	double dp; / work array for storing the parameter vector updates db_1, ..., db_n, size npnp /
1616
1617	/* Of the above arrays, jac, e, are sparse and
1618	* V, eb, dp are dense. Sparse arrays are indexed through
1619	* idxij (see below), that is with the same mechanism as the input
1620	* measurements vector x
1621	*/
1622
1623	/* submatrices sizes */
1624	int Bsz, Vsz,
1625	esz, ebsz;
1626
1627	register double ptr1, ptr2, ptr3, ptr4, sum;
1628	struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location
1629	* of B_ij in jac, etc.
1630	* This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
1631	* index k and it is used to efficiently lookup the memory locations where the non-zero
1632	* blocks of a sparse matrix/vector are stored
1633	*/
1634	int maxnm=(n>=m)? n:m, /* max. of (n, m) */
1635	rcidxs, / work array for the indexes corresponding to the nonzero elements of a single row or
1636	column in a sparse matrix, size max(n, m) */
1637	rcsubs; / work array for the subscripts of nonzero elements in a single row or column of a
1638	sparse matrix, size max(n, m) */
1639
1640	/* The following variables are needed by the LM algorithm */
1641	register int itno; /* iteration counter */
1642	int nsolved;
1643	/* temporary work arrays that are dynamically allocated */
1644	double hx, / \hat{x}_i, max. size mnmnp */
1645	diagV, / diagonals of V_i, size npnp /
1646	pdp; / p + dp, size npnp /
1647
1648	register double mu, /* damping constant */
1649	tmp; /* mainly used in matrix & vector multiplications */
1650	double p_eL2, eb_inf, pdp_eL2; /* \|\|e(p)\|\|_2, \|\|J^T e\|\|_inf, \|\|e(p+dp)\|\|_2 */
1651	double p_L2, dp_L2=DBL_MAX, dF, dL;
1652	double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]opts[2], eps3_sq=opts[3]opts[3];
1653	double init_p_eL2;
1654	int nu=2, nu2, stop, nfev, njev=0, nlss=0;
1655	int nobs, nvars;
1656
1657	struct fdj_data_x_ fdj_data;
1658	void *jac_adata;
1659
1660	/* Initialization */
1661
1662	/* block sizes */
1663	Bsz=mnp * pnp; Vsz=pnp * pnp;
1664	esz=mnp; ebsz=pnp;
1665
1666	/* count total number of visible image points */
1667	for(i=nvis=0, jj=n*m; i<jj; ++i)
1668	nvis+=vmask[i];
1669
1670	nobs=nvis*mnp;
1671	nvars=n*pnp;
1672	if(nobs<nvars){
1673	fprintf(stderr, "sba_str_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
1674	exit(1);
1675	}
1676
1677	/* allocate work arrays */
1678	jac=(double )emalloc(nvisBsz*sizeof(double));
1679	V=(double )emalloc(nVsz*sizeof(double));
1680	e=(double )emalloc(nobssizeof(double));
1681	eb=(double )emalloc(nvarssizeof(double));
1682	dp=(double )emalloc(nvarssizeof(double));
1683	rcidxs=(int )emalloc(maxnmsizeof(int));
1684	rcsubs=(int )emalloc(maxnmsizeof(int));
1685
1686
1687	hx=(double )emalloc(nobssizeof(double));
1688	diagV=(double )emalloc(nvarssizeof(double));
1689	pdp=(double )emalloc(nvarssizeof(double));
1690
1691	/* allocate & fill up the idxij structure */
1692	sba_crsm_alloc(&idxij, n, m, nvis);
1693	for(i=k=0; i<n; ++i){
1694	idxij.rowptr[i]=k;
1695	ii=i*m;
1696	for(j=0; j<m; ++j)
1697	if(vmask[ii+j]){
1698	idxij.val[k]=k;
1699	idxij.colidx[k++]=j;
1700	}
1701	}
1702	idxij.rowptr[n]=nvis;
1703
1704	/* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
1705	if(!fjac){
1706	fdj_data.func=func;
1707	fdj_data.cnp=0;
1708	fdj_data.pnp=pnp;
1709	fdj_data.mnp=mnp;
1710	fdj_data.hx=hx;
1711	fdj_data.hxx=(double )emalloc(nobssizeof(double));
1712	fdj_data.func_rcidxs=(int )emalloc(2maxnm*sizeof(int));
1713	fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm;
1714	fdj_data.adata=adata;
1715
1716	fjac=sba_fdjac_x;
1717	jac_adata=(void *)&fdj_data;
1718	}
1719	else{
1720	fdj_data.hxx=NULL;
1721	jac_adata=adata;
1722	}
1723
1724	if(itmax==0){ /* verify jacobian */
1725	sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, pnp, mnp, adata, jac_adata);
1726	retval=0;
1727	goto freemem_and_return;
1728	}
1729
1730	/* compute the error vectors e_ij in hx */
1731	(*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
1732	/* compute e=x - f(p) and its L2 norm */
1733	for(i=0, p_eL2=0.0; i<nobs; ++i){
1734	e[i]=tmp=x[i]-hx[i];
1735	p_eL2+=tmp*tmp;
1736	}
1737
1738	if(verbose) printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
1739	init_p_eL2=p_eL2;
1740
1741	for(itno=stop=0; itno<itmax && !stop; ++itno){
1742	/* Note that p, e and \|\|e\|\|_2 have been updated at the previous iteration */
1743
1744	/* compute derivative submatrices B_ij */
1745	(*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
1746
1747	/* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
1748	/* V_i is symmetric, therefore its computation can be speeded up by
1749	* computing only the upper part and then reusing it for the lower one.
1750	* Recall that B_ij is mnp x pnp
1751	*/
1752	/* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
1753	/* Recall that e_ij is mnp x 1
1754	*/
1755	_dblzero(V, nVsz); / clear all V_i */
1756	_dblzero(eb, nebsz); / clear all eb_i */
1757	for(i=0; i<n; ++i){
1758	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1759	ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
1760
1761	nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
1762	for(j=0; j<nnz; ++j){
1763	/* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
1764	ptr3=jac + idxij.val[rcidxs[j]]*Bsz;
1765
1766	/* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
1767	for(ii=0; ii<pnp; ++ii){
1768	for(jj=ii; jj<pnp; ++jj){
1769	for(k=0, sum=0.0; k<mnp; ++k)
1770	sum+=ptr3[kpnp+ii]ptr3[k*pnp+jj];
1771	ptr1[ii*pnp+jj]+=sum;
1772	}
1773
1774	/* copy the LOWER TRIANGULAR PART of V_i from the upper one */
1775	for(jj=0; jj<ii; ++jj)
1776	ptr1[iipnp+jj]=ptr1[jjpnp+ii];
1777	}
1778
1779	ptr4=e + idxij.val[rcidxs[j]]esz; / set ptr4 to point to e_ij */
1780	/* compute B_ij^T e_ij and add it to eb_i */
1781	for(ii=0; ii<pnp; ++ii){
1782	for(jj=0, sum=0.0; jj<mnp; ++jj)
1783	sum+=ptr3[jjpnp+ii]ptr4[jj];
1784	ptr2[ii]+=sum;
1785	}
1786	}
1787	}
1788
1789	/* Compute \|\|J^T e\|\|_inf and \|\|p\|\|^2 */
1790	for(i=0, p_L2=eb_inf=0.0; i<nvars; ++i){
1791	if(eb_inf < (tmp=FABS(eb[i]))) eb_inf=tmp;
1792	p_L2+=p[i]*p[i];
1793	}
1794	//p_L2=sqrt(p_L2);
1795
1796	/* save diagonal entries so that augmentation can be later canceled.
1797	* Diagonal entries are in V_i
1798	*/
1799	for(i=0; i<n; ++i){
1800	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1801	ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1802	for(j=0; j<pnp; ++j)
1803	ptr2[j]=ptr1[j*pnp+j];
1804	}
1805
1806	/*
1807	if(!(itno%100)){
1808	printf("Current estimate: ");
1809	for(i=0; i<nvars; ++i)
1810	printf("%.9g ", p[i]);
1811	printf("-- errors %.9g %0.9g\n", eb_inf, p_eL2);
1812	}
1813	*/
1814
1815	/* check for convergence */
1816	if((eb_inf <= eps1)){
1817	dp_L2=0.0; /* no increment for p in this case */
1818	stop=1;
1819	break;
1820	}
1821
1822	/* compute initial damping factor */
1823	if(itno==0){
1824	for(i=0, tmp=DBL_MIN; i<nvars; ++i)
1825	if(diagV[i]>tmp) tmp=diagV[i]; /* find max diagonal element */
1826	mu=tau*tmp;
1827	}
1828
1829	/* determine increment using adaptive damping */
1830	while(1){
1831	/* augment V */
1832	for(i=0; i<n; ++i){
1833	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1834	for(j=0; j<pnp; ++j)
1835	ptr1[j*pnp+j]+=mu;
1836	}
1837
1838	/* solve the linear systems V_i db_i = eb_i to compute the db_i /
1839	for(i=nsolved=0; i<n; ++i){
1840	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1841	ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
1842	ptr3=dp + i*pnp; // set ptr3 to point to db_i
1843
1844	//nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1845	nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1846	//nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1847	//nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1848	//nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1849	//nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1850	//nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); ++nlss;
1851	}
1852
1853	if(nsolved==n){
1854
1855	/* parameter vector updates are now in dp */
1856
1857	/* compute p's new estimate and \|\|dp\|\|^2 */
1858	for(i=0, dp_L2=0.0; i<nvars; ++i){
1859	pdp[i]=p[i] + (tmp=dp[i]);
1860	dp_L2+=tmp*tmp;
1861	}
1862	//dp_L2=sqrt(dp_L2);
1863
1864	if(dp_L2<=eps2_sqp_L2){ / relative change in p is small, stop */
1865	//if(dp_L2<=eps2(p_L2 + eps2)){ / relative change in p is small, stop */
1866	stop=2;
1867	break;
1868	}
1869
1870	if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
1871	//if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
1872	stop=4;
1873	break;
1874	}
1875
1876	(func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; / evaluate function at p + dp */
1877	if(verbose>1)
1878	printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
1879	for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute \|\|e(pdp)\|\|_2 */
1880	hx[i]=tmp=x[i]-hx[i];
1881	pdp_eL2+=tmp*tmp;
1882	}
1883
1884	for(i=0, dL=0.0; i<nvars; ++i)
1885	dL+=dp[i](mudp[i]+eb[i]);
1886
1887	dF=p_eL2-pdp_eL2;
1888
1889	if(verbose>1)
1890	printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
1891
1892	if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
1893	tmp=(2.0*dF/dL-1.0);
1894	tmp=1.0-tmptmptmp;
1895	mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
1896	nu=2;
1897
1898	for(i=0; i<nvars; ++i) /* update p's estimate */
1899	p[i]=pdp[i];
1900
1901	for(i=0; i<nobs; ++i) /* update e and \|\|e\|\|_2 */
1902	e[i]=hx[i];
1903	p_eL2=pdp_eL2;
1904	break;
1905	}
1906	} /* nsolved==n */
1907
1908	/* if this point is reached, either at least one linear system could not be solved or
1909	* the error did not reduce; in any case, the increment must be rejected
1910	*/
1911
1912	mu*=nu;
1913	nu2=nu<<1; // 2*nu;
1914	if(nu2<=nu){ /* nu has wrapped around (overflown) */
1915	fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_str_levmar_x()! Singular hessian matrix?\n");
1916	//exit(1);
1917	stop=6;
1918	break;
1919	}
1920	nu=nu2;
1921
1922	/* restore V diagonal entries */
1923	for(i=0; i<n; ++i){
1924	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1925	ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1926	for(j=0; j<pnp; ++j)
1927	ptr1[j*pnp+j]=ptr2[j];
1928	}
1929	} /* inner loop */
1930
1931	if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
1932	}
1933
1934	if(itno>=itmax) stop=3;
1935
1936	/* restore V diagonal entries */
1937	for(i=0; i<n; ++i){
1938	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1939	ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1940	for(j=0; j<pnp; ++j)
1941	ptr1[j*pnp+j]=ptr2[j];
1942	}
1943
1944	if(info){
1945	info[0]=init_p_eL2;
1946	info[1]=p_eL2;
1947	info[2]=eb_inf;
1948	info[3]=dp_L2;
1949	for(i=0; i<n; ++i){
1950	ptr1=V + i*Vsz; // set ptr1 to point to V_i
1951	for(j=0; j<pnp; ++j)
1952	if(tmp<ptr1[jpnp+j]) tmp=ptr1[jpnp+j];
1953	}
1954	info[4]=mu/tmp;
1955	info[5]=itno;
1956	info[6]=stop;
1957	info[7]=nfev;
1958	info[8]=njev;
1959	info[9]=nlss;
1960	}
1961	//sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs);
1962	retval=(stop!=4)? itno : -1;
1963
1964	freemem_and_return: /* NOTE: this point is also reached via a goto! */
1965
1966	/* free whatever was allocated */
1967	free(jac); free(V);
1968	free(e); free(eb);
1969	free(dp);
1970	free(rcidxs); free(rcsubs);
1971
1972	free(hx); free(diagV); free(pdp);
1973	if(fdj_data.hxx){ // cleanup
1974	free(fdj_data.hxx);
1975	free(fdj_data.func_rcidxs);
1976	}
1977
1978	sba_crsm_free(&idxij);
1979
1980	return retval;
1981	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: proiecte/pmake3d/make3d_original/Make3dSingleImageStanford_version0.1/third_party/sba-1.3/sba_levmar.c @ 37

Download in other formats: