source: proiecte/pmake3d/make3d_original/Make3dSingleImageStanford_version0.1/third_party/sba-1.3/sba_levmar.c @ 37

Last change on this file since 37 was 37, checked in by (none), 14 years ago

Added original make3d

File size: 74.8 KB
Line 
1/////////////////////////////////////////////////////////////////////////////////
2////
3////  Expert drivers for sparse bundle adjustment based on the
4////  Levenberg - Marquardt minimization algorithm
5////  Copyright (C) 2004  Manolis Lourakis (lourakis@ics.forth.gr)
6////  Institute of Computer Science, Foundation for Research & Technology - Hellas
7////  Heraklion, Crete, Greece.
8////
9////  This program is free software; you can redistribute it and/or modify
10////  it under the terms of the GNU General Public License as published by
11////  the Free Software Foundation; either version 2 of the License, or
12////  (at your option) any later version.
13////
14////  This program is distributed in the hope that it will be useful,
15////  but WITHOUT ANY WARRANTY; without even the implied warranty of
16////  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17////  GNU General Public License for more details.
18////
19///////////////////////////////////////////////////////////////////////////////////
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <math.h>
25#include <float.h>
26
27#include "sba.h"
28#include "sba_chkjac.h"
29
30#define SBA_EPSILON       1E-12
31#define SBA_EPSILON_SQ    ( (SBA_EPSILON)*(SBA_EPSILON) )
32
33#define SBA_ONE_THIRD     0.3333333334 /* 1.0/3.0 */
34
35
36#define emalloc(sz)       emalloc_(__FILE__, __LINE__, sz)
37
38#define FABS(x)           (((x)>=0)? (x) : -(x))
39
40#define ROW_MAJOR         0
41#define COLUMN_MAJOR      1
42#define MAT_STORAGE       COLUMN_MAJOR
43
44
45/* inline */
46#ifdef _MSC_VER
47#define inline __inline //MSVC
48#elif !defined(__GNUC__)
49#define inline //other than MSVC, GCC: define empty
50#endif
51
52/* contains information necessary for computing a finite difference approximation to a jacobian,
53 * e.g. function to differentiate, problem dimensions and pointers to working memory buffers
54 */
55struct fdj_data_x_ {
56  void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); /* function to differentiate */
57  int cnp, pnp, mnp;  /* parameter numbers */
58  int *func_rcidxs,
59      *func_rcsubs;   /* working memory for func invocations.
60                       * Notice that this has to be different
61                       * than the working memory used for
62                       * evaluating the jacobian!
63                       */
64  double *hx, *hxx;   /* memory to save results in */
65  void *adata;
66};
67
68/* auxiliary memory allocation routine with error checking */
69inline static void *emalloc_(char *file, int line, size_t sz)
70{
71void *ptr;
72
73  ptr=(void *)malloc(sz);
74  if(ptr==NULL){
75    fprintf(stderr, "memory allocation request for %u bytes failed in file %s, line %d, exiting", sz, file, line);
76    exit(1);
77  }
78
79  return ptr;
80}
81
82/* auxiliary routine for clearing an array of doubles */
83inline static void _dblzero(register double *arr, register int count)
84{
85  while(--count>=0)
86    *arr++=0.0;
87}
88
89/* auxiliary routine for computing the mean reprojection error; used for debugging */
90static double sba_mean_repr_error(int n, int mnp, double *x, double *hx, struct sba_crsm *idxij, int *rcidxs, int *rcsubs)
91{
92register int i, j;
93int nnz, nprojs;
94double *ptr1, *ptr2;
95double err;
96
97  for(i=nprojs=0, err=0.0; i<n; ++i){
98    nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
99    nprojs+=nnz;
100    for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */
101      ptr1=x + idxij->val[rcidxs[j]]*mnp;
102      ptr2=hx + idxij->val[rcidxs[j]]*mnp;
103
104      err+=sqrt((ptr1[0]-ptr2[0])*(ptr1[0]-ptr2[0]) + (ptr1[1]-ptr2[1])*(ptr1[1]-ptr2[1]));
105    }
106  }
107
108  return err/((double)(nprojs));
109}
110
111/* print the solution in p using sba's text format. If cnp/pnp==0 only points/cameras are printed */
112static void sba_print_sol(int n, int m, double *p, int cnp, int pnp, double *x, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs)
113{
114register int i, j, ii;
115int nnz;
116double *ptr;
117
118  if(cnp){
119    /* print camera parameters */
120    for(j=0; j<m; ++j){
121      ptr=p+cnp*j;
122      for(ii=0; ii<cnp; ++ii)
123        printf("%g ", ptr[ii]);
124      printf("\n");
125    }
126  }
127
128  if(pnp){
129    /* 3D & 2D point parameters */
130    printf("\n\n\n# X Y Z  nframes  frame0 x0 y0  frame1 x1 y1 ...\n");
131    for(i=0; i<n; ++i){
132      ptr=p+cnp*m+i*pnp;
133      for(ii=0; ii<pnp; ++ii) // print 3D coordinates
134        printf("%g ", ptr[ii]);
135
136      nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */
137      printf("%d ", nnz);
138
139      for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */
140        ptr=x + idxij->val[rcidxs[j]]*mnp;
141
142        printf("%d ", rcsubs[j]);
143        for(ii=0; ii<mnp; ++ii) // print 2D coordinates
144          printf("%g ", ptr[ii]);
145      }
146      printf("\n");
147    }
148  }
149}
150
151
152/* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in
153 * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images.
154 * The jacobian is approximated with the aid of finite differences and is returned in the order
155 * (A_11, ..., A_1m, ..., A_n1, ..., A_nm, B_11, ..., B_1m, ..., B_n1, ..., B_nm),
156 * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ).
157 * Notice that depending on idxij, some of the A_ij, B_ij might be missing
158 *
159 * Problem-specific information is assumed to be stored in a structure pointed to by "dat".
160 *
161 * NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure:
162 *       A_11  0     0     B_11 0    0    0
163 *       0     A_12  0     B_12 0    0    0
164 *       0     0     A_13  B_13 0    0    0
165 *       A_21  0     0     0    B_21 0    0
166 *       0     A_22  0     0    B_22 0    0
167 *       0     0     A_23  0    B_23 0    0
168 *       A_31  0     0     0    0    B_31 0
169 *       0     A_32  0     0    0    B_32 0
170 *       0     0     A_33  0    0    B_33 0
171 *       A_41  0     0     0    0    0    B_41
172 *       0     A_42  0     0    0    0    B_42
173 *       0     0     A_43  0    0    0    B_43
174 *       To reduce the total number of objective function evaluations, this structure can be
175 *       exploited as follows: A certain d is added to the j-th parameters of all cameras and
176 *       the objective function is evaluated at the resulting point. This evaluation suffices
177 *       to compute the corresponding columns of *all* A_ij through finite differences. A similar
178 *       strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function
179 *       evaluations are needed to compute the jacobian, much fewer compared to the m*cnp+n*pnp+1
180 *       that would be required by the naive strategy of computing one column of the jacobian
181 *       per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is
182 *       much faster compared to the naive strategy, it is not preferable to analytic jacobians,
183 *       since the latter are considerably faster to compute and result in fewer LM iterations.
184 */
185static void sba_fdjac_x(
186    double *p,                /* I: current parameter estimate, (m*cnp+n*pnp)x1 */
187    struct sba_crsm *idxij,   /* I: sparse matrix containing the location of x_ij in hx */
188    int    *rcidxs,           /* work array for the indexes of nonzero elements of a single sparse matrix row/column */
189    int    *rcsubs,           /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */
190    double *jac,              /* O: array for storing the approximated jacobian */
191    void   *dat)              /* I: points to a "fdj_data_x_" structure */
192{
193  register int i, j, ii, jj;
194  double *pa, *pb, *pqr, *ppt, *jaca, *jacb;
195  register double *pAB, *phx, *phxx;
196  int n, m, nm, nnz, Asz, Bsz, idx;
197
198  double *tmpd;
199  register double d;
200
201  struct fdj_data_x_ *fdjd;
202  void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata);
203  double *hx, *hxx;
204  int cnp, pnp, mnp;
205  void *adata;
206
207
208  /* retrieve problem-specific information passed in *dat */
209  fdjd=(struct fdj_data_x_ *)dat;
210  func=fdjd->func;
211  cnp=fdjd->cnp; pnp=fdjd->pnp; mnp=fdjd->mnp;
212  hx=fdjd->hx;
213  hxx=fdjd->hxx;
214  adata=fdjd->adata;
215
216  n=idxij->nr; m=idxij->nc;
217  pa=p; pb=p+m*cnp;
218  Asz=mnp*cnp; Bsz=mnp*pnp;
219  jaca=jac; jacb=jac+idxij->nnz*Asz;
220
221  nm=(n>=m)? n : m; // max(n, m);
222  tmpd=(double *)emalloc(nm*sizeof(double));
223
224  (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx, adata); // evaluate supplied function on current solution
225
226  if(cnp){ // is motion varying?
227    /* compute A_ij */
228    for(jj=0; jj<cnp; ++jj){
229      for(j=0; j<m; ++j){
230        pqr=pa+j*cnp; // j-th camera parameters
231        /* determine d=max(SBA_DELTA_SCALE*|pqr[jj]|, SBA_MIN_DELTA), see HZ */
232        d=(double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation
233        d=FABS(d);
234        if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA;
235
236        tmpd[j]=d;
237        pqr[jj]+=d;
238      }
239
240      (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
241
242      for(j=0; j<m; ++j){
243        pqr=pa+j*cnp; // j-th camera parameters
244        pqr[jj]-=tmpd[j]; /* restore */
245        d=1.0/tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */
246
247        nnz=sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
248        for(i=0; i<nnz; ++i){
249          idx=idxij->val[rcidxs[i]];
250          phx=hx + idx*mnp; // set phx to point to hx_ij
251          phxx=hxx + idx*mnp; // set phxx to point to hxx_ij
252          pAB=jaca + idx*Asz; // set pAB to point to A_ij
253
254          for(ii=0; ii<mnp; ++ii)
255            pAB[ii*cnp+jj]=(phxx[ii]-phx[ii])*d;
256        }
257      }
258    }
259  }
260
261  if(pnp){ // is structure varying?
262    /* compute B_ij */
263    for(jj=0; jj<pnp; ++jj){
264      for(i=0; i<n; ++i){
265        ppt=pb+i*pnp; // i-th point parameters
266        /* determine d=max(SBA_DELTA_SCALE*|ppt[jj]|, SBA_MIN_DELTA), see HZ */
267        d=(double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation
268        d=FABS(d);
269        if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA;
270
271        tmpd[i]=d;
272        ppt[jj]+=d;
273      }
274
275      (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata);
276
277      for(i=0; i<n; ++i){
278        ppt=pb+i*pnp; // i-th point parameters
279        ppt[jj]-=tmpd[i]; /* restore */
280        d=1.0/tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */
281
282        nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
283        for(j=0; j<nnz; ++j){
284          idx=idxij->val[rcidxs[j]];
285          phx=hx + idx*mnp; // set phx to point to hx_ij
286          phxx=hxx + idx*mnp; // set phxx to point to hxx_ij
287          pAB=jacb + idx*Bsz; // set pAB to point to B_ij
288
289          for(ii=0; ii<mnp; ++ii)
290            pAB[ii*pnp+jj]=(phxx[ii]-phx[ii])*d;
291        }
292      }
293    }
294  }
295
296  free(tmpd);
297}
298
299/* Bundle adjustment on camera and structure parameters
300 * using the sparse Levenberg-Marquardt as described in HZ p. 568
301 */
302
303int sba_motstr_levmar_x(
304    const int n,   /* number of points */
305    const int m,   /* number of images */
306    const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified.
307                                                  * All A_ij (see below) with j<mcon are assumed to be zero
308                                                  */
309    char *vmask,  /* visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */
310    double *p,    /* initial parameter vector p0: (a1, ..., am, b1, ..., bn).
311                   * aj are the image j parameters, bi are the i-th point parameters,
312                   * size m*cnp + n*pnp
313                   */
314    const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
315    const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */
316    double *x,    /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
317                   * x_ij is the projection of the i-th point on the j-th image.
318                   * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
319                   * see vmask[i][j], max. size n*m*mnp
320                   */
321    const int mnp,/* number of parameters for EACH measurement; usually 2 */
322    void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
323                                              /* functional relation describing measurements. Given a parameter vector p,
324                                               * computes a prediction of the measurements \hat{x}. p is (m*cnp + n*pnp)x1,
325                                               * \hat{x} is (n*m*mnp)x1, maximum
326                                               * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
327                                               * as working memory
328                                               */
329    void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
330                                              /* function to evaluate the sparse jacobian dX/dp.
331                                               * The Jacobian is returned in jac as
332                                               * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m,
333                                               *  dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
334                                               * jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm, B_11, ..., B_1m, ..., B_n1, ..., B_nm)
335                                               * Notice that depending on idxij, some of the A_ij and B_ij might be missing.
336                                               * Note also that A_ij and B_ij are mnp x cnp and mnp x pnp matrices resp. and they
337                                               * should be stored in jac in row-major order.
338                                               * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
339                                               * as working memory
340                                               *
341                                               * If NULL, the jacobian is approximated by repetitive func calls and finite
342                                               * differences. This is computationally inefficient and thus NOT recommended.
343                                               */
344    void *adata,       /* pointer to possibly additional data, passed uninterpreted to func, fjac */ 
345
346    int itmax,         /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
347    int verbose,       /* I: verbosity */
348    double opts[SBA_OPTSSZ],
349                             /* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu,
350                        * stopping thresholds for ||J^T e||_inf, ||dp||_2 and ||e||_2
351                        */
352    double info[SBA_INFOSZ]
353                             /* O: information regarding the minimization. Set to NULL if don't care
354                        * info[0]=||e||_2 at initial p.
355                        * info[1-4]=[ ||e||_2, ||J^T e||_inf,  ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
356                        * info[5]= # iterations,
357                        * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
358                        *                                 2 - stopped by small dp
359                        *                                 3 - stopped by itmax
360                        *                                 4 - singular matrix. Restart from current p with increased mu
361                        *                                 5 - stopped by small ||e||_2
362                        *                                 6 - too many attempts to increase damping. Restart with increased mu
363                        * info[7]= # function evaluations
364                        * info[8]= # jacobian evaluations
365                                          * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
366                        */
367)
368{
369register int i, j, ii, jj, k, l;
370int nvis, nnz, retval;
371
372/* The following are work arrays that are dynamically allocated by sba_motstr_levmar_x() */
373double *jac;  /* work array for storing the jacobian, max. size n*m*(mnp*cnp + mnp*pnp) */
374double *U;    /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */
375double *V;    /* work array for storing the V_i in the order V_1, ..., V_n, size n*pnp*pnp */
376double *V_1;  /* work array for storing the (V*_i)^-1 in the order (V*_1)^-1, ..., (V*_n)^-1, size n*pnp*pnp */
377
378double *e;    /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
379                 max. size n*m*mnp */
380double *eab;  /* work array for storing the ea_j & eb_i in the order ea_1, .. ea_m eb_1, .. eb_n size m*cnp + n*pnp */
381
382double *E;   /* work array for storing the e_j in the order e_1, .. e_m, size m*cnp */
383
384/* Notice that the blocks W_ij, Y_ij are zero iff A_ij (equivalently B_ij) is zero. This means
385 * that the matrices consisting of blocks W_ij and Y_ij are themselves sparse, similarly to the
386 * block matrices made up of the A_ij and B_ij (i.e. jaca and jacb)
387 */
388double *W;    /* work array for storing the W_ij in the order W_11, ..., W_1m, ..., W_n1, ..., W_nm,
389                 max. size n*m*cnp*pnp */
390double *Y;    /* work array for storing the Y_ij in the order Y_11, ..., Y_1m, ..., Y_n1, ..., Y_nm,
391                 max. size n*m*cnp*pnp */
392double *YWt;  /* work array for storing \sum_i Y_ij W_ik^T, size cnp*cnp */
393double *S;    /* work array for storing the block array S_jk, size m*m*cnp*cnp */
394double *dp;   /* work array for storing the parameter vector updates da_1, ..., da_m, db_1, ..., db_n, size m*cnp + n*pnp */
395double *Wtda; /* work array for storing \sum_j W_ij^T da_j, size pnp */
396
397/* Of the above arrays, jac, e, W, Y are sparse and
398 * U, V, V_1, eab, E, S, dp are dense. Sparse arrays are indexed through
399 * idxij (see below), that is with the same mechanism as the input
400 * measurements vector x
401 */
402
403double *pa, *pb, *jaca, *jacb, *ea, *eb, *dpa, *dpb; /* pointers into p, jac, eab and dp respectively */
404
405/* submatrices sizes */
406int Asz, Bsz, Usz, Vsz,
407    Wsz, Ysz, esz, easz, ebsz,
408    YWtsz, Wtdasz, Sblsz;
409
410int Sdim; /* S matrix actual dimension */
411
412register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
413struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij
414                        * in jaca, B_ij in jacb, etc.
415                        * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
416                        * index k and it is used to efficiently lookup the memory locations where the non-zero
417                        * blocks of a sparse matrix/vector are stored
418                        */
419int maxnm=(n>=m)? n:m, /* max. of (n, m) */
420    *rcidxs,  /* work array for the indexes corresponding to the nonzero elements of a single row or
421                 column in a sparse matrix, size max(n, m) */
422    *rcsubs;  /* work array for the subscripts of nonzero elements in a single row or column of a
423                 sparse matrix, size max(n, m) */
424
425/* The following variables are needed by the LM algorithm */
426register int itno;  /* iteration counter */
427int issolved;
428/* temporary work arrays that are dynamically allocated */
429double *hx,         /* \hat{x}_i, max. size m*n*mnp */
430       *diagUV,     /* diagonals of U_j, V_i, size m*cnp + n*pnp */
431       *pdp;        /* p + dp, size m*cnp + n*pnp */
432
433double *diagU, *diagV; /* pointers into diagUV */
434
435register double mu,  /* damping constant */
436                tmp; /* mainly used in matrix & vector multiplications */
437double p_eL2, eab_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
438double p_L2, dp_L2=DBL_MAX, dF, dL;
439double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2], eps3_sq=opts[3]*opts[3];
440double init_p_eL2;
441int nu=2, nu2, stop, nfev, njev=0, nlss=0;
442int nobs, nvars;
443const int mmcon=m-mcon;
444
445struct fdj_data_x_ fdj_data;
446void *jac_adata;
447
448/* Initialization */
449
450  /* block sizes */
451  Asz=mnp * cnp; Bsz=mnp * pnp;
452  Usz=cnp * cnp; Vsz=pnp * pnp;
453  Wsz=cnp * pnp; Ysz=cnp * pnp;
454  esz=mnp;
455  easz=cnp; ebsz=pnp;
456  YWtsz=cnp * cnp;
457  Wtdasz=pnp;
458  Sblsz=cnp * cnp;
459  Sdim=mmcon * cnp;
460
461  /* count total number of visible image points */
462  for(i=nvis=0, jj=n*m; i<jj; ++i)
463    nvis+=vmask[i];
464
465  nobs=nvis*mnp;
466  nvars=m*cnp + n*pnp;
467  if(nobs<nvars){
468    fprintf(stderr, "sba_motstr_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
469    exit(1);
470  }
471
472  /* allocate work arrays */
473  jac=(double *)emalloc(nvis*(Asz+Bsz)*sizeof(double));
474  U=(double *)emalloc(m*Usz*sizeof(double));
475  V=(double *)emalloc(n*Vsz*sizeof(double));
476  V_1=(double *)emalloc(n*Vsz*sizeof(double));
477  e=(double *)emalloc(nobs*sizeof(double));
478  eab=(double *)emalloc(nvars*sizeof(double));
479  E=(double *)emalloc(m*cnp*sizeof(double));
480  W=(double *)emalloc(nvis*Wsz*sizeof(double));
481  Y=(double *)emalloc(nvis*Ysz*sizeof(double));
482  YWt=(double *)emalloc(YWtsz*sizeof(double));
483  S=(double *)emalloc(m*m*Sblsz*sizeof(double));
484  dp=(double *)emalloc(nvars*sizeof(double));
485  Wtda=(double *)emalloc(pnp*sizeof(double));
486  rcidxs=(int *)emalloc(maxnm*sizeof(int));
487  rcsubs=(int *)emalloc(maxnm*sizeof(int));
488
489
490  hx=(double *)emalloc(nobs*sizeof(double));
491  diagUV=(double *)emalloc(nvars*sizeof(double));
492  pdp=(double *)emalloc(nvars*sizeof(double));
493
494
495  /* set up auxiliary pointers */
496  pa=p; pb=p+m*cnp;
497  jaca=jac; jacb=jac+nvis*Asz;
498  ea=eab; eb=eab+m*cnp;
499  dpa=dp; dpb=dp+m*cnp;
500
501  diagU=diagUV; diagV=diagUV + m*cnp;
502
503
504  /* allocate & fill up the idxij structure */
505  sba_crsm_alloc(&idxij, n, m, nvis);
506  for(i=k=0; i<n; ++i){
507    idxij.rowptr[i]=k;
508    ii=i*m;
509    for(j=0; j<m; ++j)
510      if(vmask[ii+j]){
511        idxij.val[k]=k;
512        idxij.colidx[k++]=j;
513      }
514  }
515  idxij.rowptr[n]=nvis;
516
517  /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
518  if(!fjac){
519    fdj_data.func=func;
520    fdj_data.cnp=cnp;
521    fdj_data.pnp=pnp;
522    fdj_data.mnp=mnp;
523    fdj_data.hx=hx;
524    fdj_data.hxx=(double *)emalloc(nobs*sizeof(double));
525    fdj_data.func_rcidxs=(int *)emalloc(2*maxnm*sizeof(int));
526    fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm;
527    fdj_data.adata=adata;
528
529    fjac=sba_fdjac_x;
530    jac_adata=(void *)&fdj_data;
531  }
532  else{
533    fdj_data.hxx=NULL;
534    jac_adata=adata;
535  }
536
537  if(itmax==0){ /* verify jacobian */
538    sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, pnp, mnp, adata, jac_adata);
539    retval=0;
540    goto freemem_and_return;
541  }
542
543  /* compute the error vectors e_ij in hx */
544  (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
545  /* compute e=x - f(p) and its L2 norm */
546  for(i=0, p_eL2=0.0; i<nobs; ++i){
547    e[i]=tmp=x[i]-hx[i];
548    p_eL2+=tmp*tmp;
549  }
550
551  if(verbose) printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
552  init_p_eL2=p_eL2;
553
554  for(itno=stop=0; itno<itmax && !stop; ++itno){
555    /* Note that p, e and ||e||_2 have been updated at the previous iteration */
556
557    /* compute derivative submatrices A_ij, B_ij */
558    (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
559
560    /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
561    /* U_j is symmetric, therefore its computation can be speeded up by
562     * computing only the upper part and then reusing it for the lower one.
563     * Recall that A_ij is mnp x cnp
564     */
565    /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
566    /* Recall that e_ij is mnp x 1
567     */
568    _dblzero(U, m*Usz); /* clear all U_j */
569    _dblzero(ea, m*easz); /* clear all ea_j */
570    for(j=mcon; j<m; ++j){
571      ptr1=U + j*Usz; // set ptr1 to point to U_j
572      ptr2=ea + j*easz; // set ptr2 to point to ea_j
573
574      nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
575      for(i=0; i<nnz; ++i){
576        /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
577        ptr3=jaca + idxij.val[rcidxs[i]]*Asz;
578
579        /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
580        for(ii=0; ii<cnp; ++ii){
581          for(jj=ii; jj<cnp; ++jj){
582            for(k=0, sum=0.0; k<mnp; ++k)
583              sum+=ptr3[k*cnp+ii]*ptr3[k*cnp+jj];
584            ptr1[ii*cnp+jj]+=sum;
585          }
586
587          /* copy the LOWER TRIANGULAR PART of U_j from the upper one */
588          for(jj=0; jj<ii; ++jj)
589            ptr1[ii*cnp+jj]=ptr1[jj*cnp+ii];
590        }
591
592        ptr4=e + idxij.val[rcidxs[i]]*esz; /* set ptr4 to point to e_ij */
593        /* compute A_ij^T e_ij and add it to ea_j */
594        for(ii=0; ii<cnp; ++ii){
595          for(jj=0, sum=0.0; jj<mnp; ++jj)
596            sum+=ptr3[jj*cnp+ii]*ptr4[jj];
597          ptr2[ii]+=sum;
598        }
599      }
600    }
601
602    /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
603    /* V_i is symmetric, therefore its computation can be speeded up by
604     * computing only the upper part and then reusing it for the lower one.
605     * Recall that B_ij is mnp x pnp
606     */
607    /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
608    /* Recall that e_ij is mnp x 1
609     */
610          _dblzero(V, n*Vsz); /* clear all V_i */
611          _dblzero(eb, n*ebsz); /* clear all eb_i */
612    for(i=0; i<n; ++i){
613      ptr1=V + i*Vsz; // set ptr1 to point to V_i
614      ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
615
616      nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
617      for(j=0; j<nnz; ++j){
618        /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
619        ptr3=jacb + idxij.val[rcidxs[j]]*Bsz;
620     
621        /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
622        for(ii=0; ii<pnp; ++ii){
623          for(jj=ii; jj<pnp; ++jj){
624            for(k=0, sum=0.0; k<mnp; ++k)
625              sum+=ptr3[k*pnp+ii]*ptr3[k*pnp+jj];
626            ptr1[ii*pnp+jj]+=sum;
627          }
628
629          /* copy the LOWER TRIANGULAR PART of V_i from the upper one */
630          for(jj=0; jj<ii; ++jj)
631            ptr1[ii*pnp+jj]=ptr1[jj*pnp+ii];
632        }
633
634        ptr4=e + idxij.val[rcidxs[j]]*esz; /* set ptr4 to point to e_ij */
635        /* compute B_ij^T e_ij and add it to eb_i */
636        for(ii=0; ii<pnp; ++ii){
637          for(jj=0, sum=0.0; jj<mnp; ++jj)
638            sum+=ptr3[jj*pnp+ii]*ptr4[jj];
639          ptr2[ii]+=sum;
640        }
641      }
642    }
643
644    /* compute W_ij =  A_ij^T B_ij */ // \Sigma here!
645    /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp
646     */
647    _dblzero(W, nvis*Wsz); /* clear all W_ij */
648    for(i=0; i<n; ++i){
649      nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
650      for(j=0; j<nnz; ++j){
651        /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */
652        if(rcsubs[j]<mcon) continue; /* A_ij is zero */
653
654        ptr1=W + idxij.val[rcidxs[j]]*Wsz;
655        /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */
656        ptr2=jaca + idxij.val[rcidxs[j]]*Asz;
657        ptr3=jacb + idxij.val[rcidxs[j]]*Bsz;
658        /* compute A_ij^T B_ij and store it in W_ij */
659        for(ii=0; ii<cnp; ++ii)
660          for(jj=0; jj<pnp; ++jj){
661            for(k=0, sum=0.0; k<mnp; ++k)
662              sum+=ptr2[k*cnp+ii]*ptr3[k*pnp+jj];
663            ptr1[ii*pnp+jj]=sum;
664          }
665      }
666    }
667
668    /* Compute ||J^T e||_inf and ||p||^2 */
669    for(i=0, p_L2=eab_inf=0.0; i<nvars; ++i){
670      if(eab_inf < (tmp=FABS(eab[i]))) eab_inf=tmp;
671      p_L2+=p[i]*p[i];
672    }
673    //p_L2=sqrt(p_L2);
674
675    /* save diagonal entries so that augmentation can be later canceled.
676     * Diagonal entries are in U_j and V_i
677     */
678    for(j=mcon; j<m; ++j){
679      ptr1=U + j*Usz; // set ptr1 to point to U_j
680      ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
681      for(i=0; i<cnp; ++i)
682        ptr2[i]=ptr1[i*cnp+i];
683    }
684    for(i=0; i<n; ++i){
685      ptr1=V + i*Vsz; // set ptr1 to point to V_i
686      ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
687      for(j=0; j<pnp; ++j)
688        ptr2[j]=ptr1[j*pnp+j];
689    }
690
691/*
692if(!(itno%100)){
693  printf("Current estimate: ");
694  for(i=0; i<nvars; ++i)
695    printf("%.9g ", p[i]);
696  printf("-- errors %.9g %0.9g\n", eab_inf, p_eL2);
697}
698*/
699
700    /* check for convergence */
701    if((eab_inf <= eps1)){
702      dp_L2=0.0; /* no increment for p in this case */
703      stop=1;
704      break;
705    }
706
707   /* compute initial damping factor */
708    if(itno==0){
709      for(i=mcon*cnp, tmp=DBL_MIN; i<nvars; ++i)
710        if(diagUV[i]>tmp) tmp=diagUV[i]; /* find max diagonal element */
711      mu=tau*tmp;
712    }
713
714    /* determine increment using adaptive damping */
715    while(1){
716      /* augment U, V */
717      for(j=mcon; j<m; ++j){
718        ptr1=U + j*Usz; // set ptr1 to point to U_j
719        for(i=0; i<cnp; ++i)
720          ptr1[i*cnp+i]+=mu;
721      }
722      for(i=0; i<n; ++i){
723        ptr1=V + i*Vsz; // set ptr1 to point to V_i
724        for(j=0; j<pnp; ++j)
725          ptr1[j*pnp+j]+=mu;
726
727                    /* compute V*_i^-1 */
728                    ptr2=V_1 + i*Vsz; // set ptr2 to point to (V*_i)^-1
729        /* inverting V*_i with LU seems to be faster than Cholesky */
730        j=sba_mat_invert_LU(ptr1, ptr2, pnp); //invert the pnp x pnp matrix pointed to by ptr1 and save it in ptr2
731        //j=sba_mat_invert_Chol(ptr1, ptr2, pnp); //invert the pnp x pnp matrix pointed to by ptr1 and save it in ptr2
732                    if(!j){
733                            fprintf(stderr, "Singular matrix V*_i (i=%d) in sba_motstr_levmar_x()\n", i);
734                            exit(1);
735                    }
736      }
737
738      /* compute Y_ij = W_ij (V*_i)^-1
739       * Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp
740       */
741      _dblzero(Y, nvis*Ysz); /* clear all Y_ij */
742      for(i=0; i<n; ++i){
743        /* set ptr3 to point to (V*_i)^-1 */
744        ptr3=V_1 + i*Vsz;
745        nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
746        for(j=0; j<nnz; ++j){
747          /* set ptr1 to point to Y_ij, actual column number in rcsubs[j] */
748                      if(rcsubs[j]<mcon) continue; /* W_ij is zero */
749
750          ptr1=Y + idxij.val[rcidxs[j]]*Ysz;
751          /* set ptr2 to point to W_ij resp. */
752          ptr2=W + idxij.val[rcidxs[j]]*Wsz;
753          /* compute W_ij (V*_i)^-1 and store it in Y_ij */
754          for(ii=0; ii<cnp; ++ii)
755            for(jj=0; jj<pnp; ++jj){
756              for(k=0, sum=0.0; k<pnp; ++k)
757                sum+=ptr2[ii*pnp+k]*ptr3[k*pnp+jj];
758              ptr1[ii*pnp+jj]=sum;
759            }
760        }
761      }
762
763      _dblzero(E, m*easz); /* clear all e_j */
764      /* compute the mmcon x mmcon block matrix S and e_j */
765
766      /* Note that S is symmetric, therefore its computation can be
767       * speeded up by computing only the upper part and then reusing
768       * it for the lower one.
769                   */
770      for(j=mcon; j<m; ++j){
771                    nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */
772
773        /* compute the UPPER TRIANGULAR PART of S */
774        for(k=j; k<m; ++k){ // j>=mcon
775          /* compute \sum_i Y_ij W_ik^T in YWt */
776          /* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */ 
777          _dblzero(YWt, YWtsz); /* clear YWt */
778
779          for(i=0; i<nnz; ++i){
780            register double *pYWt;
781
782            /* find the min and max column indices of the elements in row i (actually rcsubs[i])
783             * and make sure that k falls within them. This test handles W_ik's which are
784             * certain to be zero without bothering to call sba_crsm_elmidx()
785             */
786            ii=idxij.colidx[idxij.rowptr[rcsubs[i]]];
787            jj=idxij.colidx[idxij.rowptr[rcsubs[i]+1]-1];
788            if(k<ii || k>jj) continue; /* W_ik == 0 */
789
790            /* set ptr2 to point to W_ik */
791            l=sba_crsm_elmidx(&idxij, rcsubs[i], k);
792            if(l==-1) continue; /* W_ik == 0 */
793
794            ptr2=W + idxij.val[l]*Wsz;
795            /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */
796            ptr1=Y + idxij.val[rcidxs[i]]*Ysz;
797            for(ii=0; ii<cnp; ++ii){
798              ptr3=ptr1+ii*pnp;
799              pYWt=YWt+ii*cnp;
800
801              for(jj=0; jj<cnp; ++jj){
802                ptr4=ptr2+jj*pnp;
803                for(l=0, sum=0.0; l<pnp; ++l)
804                  sum+=ptr3[l]*ptr4[l]; //ptr1[ii*pnp+l]*ptr2[jj*pnp+l];
805                pYWt[jj]+=sum; //YWt[ii*cnp+jj]+=sum;
806              }
807            }
808          }
809
810          ptr1=U + j*Usz; // set ptr1 to point to U_j
811                 
812                      /* since the linear system involving S is solved with lapack,
813                       * it is preferable to store S in column major (i.e. fortran)
814                       * order, so as to avoid unecessary transposing/copying.
815           */
816#if MAT_STORAGE==COLUMN_MAJOR
817          ptr2=S + (k-mcon)*mmcon*Usz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S
818#else
819          ptr2=S + (j-mcon)*mmcon*Usz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S
820#endif
821                 
822          if(j!=k){ /* Kronecker */
823            for(ii=0; ii<cnp; ++ii, ptr2+=Sdim)
824              for(jj=0; jj<cnp; ++jj)
825                ptr2[jj]=
826#if MAT_STORAGE==COLUMN_MAJOR
827                                                -YWt[jj*cnp+ii];
828#else
829                                                -YWt[ii*cnp+jj];
830#endif
831          }
832          else{
833            for(ii=0; ii<cnp; ++ii, ptr2+=Sdim)
834              for(jj=0; jj<cnp; ++jj)
835                ptr2[jj]=
836#if MAT_STORAGE==COLUMN_MAJOR
837                                                ptr1[jj*cnp+ii] - YWt[jj*cnp+ii];
838#else
839                                                ptr1[ii*cnp+jj] - YWt[ii*cnp+jj];
840#endif
841          }
842        }
843
844        /* copy the LOWER TRIANGULAR PART of S from the upper one */
845        for(k=mcon; k<j; ++k){
846#if MAT_STORAGE==COLUMN_MAJOR
847          ptr1=S + (k-mcon)*mmcon*Usz + (j-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S
848          ptr2=S + (j-mcon)*mmcon*Usz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S
849#else
850          ptr1=S + (j-mcon)*mmcon*Usz + (k-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S
851          ptr2=S + (k-mcon)*mmcon*Usz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S
852#endif
853          for(ii=0; ii<cnp; ++ii, ptr1+=Sdim)
854            for(jj=0, ptr3=ptr2+ii; jj<cnp; ++jj, ptr3+=Sdim)
855              ptr1[jj]=*ptr3;
856        }
857
858        /* compute e_j=ea_j - \sum_i Y_ij eb_i */
859        /* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */
860        ptr1=E + j*easz; // set ptr1 to point to e_j
861
862        for(i=0; i<nnz; ++i){
863          /* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */
864          ptr2=Y + idxij.val[rcidxs[i]]*Ysz;
865
866          /* set ptr3 to point to eb_i */
867          ptr3=eb + rcsubs[i]*ebsz;
868          for(ii=0; ii<cnp; ++ii){
869            ptr4=ptr2+ii*pnp;
870            for(jj=0, sum=0.0; jj<pnp; ++jj)
871              sum+=ptr4[jj]*ptr3[jj]; //ptr2[ii*pnp+jj]*ptr3[jj];
872            ptr1[ii]+=sum;
873          }
874        }
875
876        ptr2=ea + j*easz; // set ptr2 to point to ea_j
877        for(i=0; i<easz; ++i)
878          ptr1[i]=ptr2[i] - ptr1[i];
879      }
880
881#if 0
882      if(verbose>1){ /* count the nonzeros in S */
883        for(i=ii=0; i<Sdim*Sdim; ++i)
884          if(S[i]!=0.0) ++ii;
885        printf("\nS density %10g\n", ((double)ii)/(Sdim*Sdim));
886
887      }
888#endif
889
890      /* solve the linear system S dpa = E to compute the da_j.
891       *
892       * Note that if MAT_STORAGE==1 S is modified in the following call;
893       * this is OK since S is recomputed for each iteration
894       */
895            //issolved=sba_Axb_LU(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss;
896      issolved=sba_Axb_Chol(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss;
897      //issolved=sba_Axb_QRnoQ(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss;
898      //issolved=sba_Axb_QR(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss;
899            //issolved=sba_Axb_SVD(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss;
900            //issolved=sba_Axb_CG(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, (3*Sdim)/2, 1E-10, SBA_CG_JACOBI, MAT_STORAGE); ++nlss;
901
902            _dblzero(dpa, mcon*cnp); /* no change for the first mcon camera params */
903
904      if(issolved){
905
906        /* compute the db_i */
907        for(i=0; i<n; ++i){
908          ptr1=dpb + i*ebsz; // set ptr1 to point to db_i
909
910          /* compute \sum_j W_ij^T da_j */
911          /* Recall that W_ij is cnp x pnp and da_j is cnp x 1 */
912          _dblzero(Wtda, Wtdasz); /* clear Wtda */
913          nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */
914          for(j=0; j<nnz; ++j){
915            /* set ptr2 to point to W_ij, actual column number in rcsubs[j] */
916                              if(rcsubs[j]<mcon) continue; /* W_ij is zero */
917
918            ptr2=W + idxij.val[rcidxs[j]]*Wsz;
919
920            /* set ptr3 to point to da_j */
921            ptr3=dpa + rcsubs[j]*cnp;
922
923            for(ii=0; ii<pnp; ++ii){
924              ptr4=ptr2+ii;
925              for(jj=0, sum=0.0; jj<cnp; ++jj)
926                sum+=ptr4[jj*pnp]*ptr3[jj]; //ptr2[jj*pnp+ii]*ptr3[jj];
927              Wtda[ii]+=sum;
928            }
929          }
930
931          /* compute eb_i - \sum_j W_ij^T da_j = eb_i - Wtda in Wtda */
932          ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
933          for(ii=0; ii<pnp; ++ii)
934            Wtda[ii]=ptr2[ii] - Wtda[ii];
935
936          /* compute the product (V*_i)^-1 Wtda = (V*_i)^-1 (eb_i - \sum_j W_ij^T da_j) */
937          ptr2=V_1 + i*Vsz; // set ptr2 to point to (V*_i)^-1
938          for(ii=0; ii<pnp; ++ii){
939            for(jj=0, sum=0.0; jj<pnp; ++jj)
940              sum+=ptr2[ii*pnp+jj]*Wtda[jj];
941            ptr1[ii]=sum;
942          }
943        }
944
945        /* parameter vector updates are now in dpa, dpb */
946
947        /* compute p's new estimate and ||dp||^2 */
948        for(i=0, dp_L2=0.0; i<nvars; ++i){
949          pdp[i]=p[i] + (tmp=dp[i]);
950          dp_L2+=tmp*tmp;
951        }
952        //dp_L2=sqrt(dp_L2);
953
954        if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
955        //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
956          stop=2;
957          break;
958        }
959
960       if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
961       //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
962         stop=4;
963         break;
964       }
965
966        (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */
967        if(verbose>1)
968          printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
969        for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute ||e(pdp)||_2 */
970          hx[i]=tmp=x[i]-hx[i];
971          pdp_eL2+=tmp*tmp;
972        }
973
974        for(i=0, dL=0.0; i<nvars; ++i)
975          dL+=dp[i]*(mu*dp[i]+eab[i]);
976
977        dF=p_eL2-pdp_eL2;
978
979        if(verbose>1)
980          printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
981
982        if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
983          tmp=(2.0*dF/dL-1.0);
984          tmp=1.0-tmp*tmp*tmp;
985          mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
986          nu=2;
987
988          for(i=0; i<nvars; ++i) /* update p's estimate */
989            p[i]=pdp[i];
990
991          for(i=0; i<nobs; ++i) /* update e and ||e||_2 */
992            e[i]=hx[i];
993          p_eL2=pdp_eL2;
994          break;
995        }
996      } /* issolved */
997
998      /* if this point is reached, either the linear system could not be solved or
999       * the error did not reduce; in any case, the increment must be rejected
1000       */
1001
1002      mu*=nu;
1003      nu2=nu<<1; // 2*nu;
1004      if(nu2<=nu){ /* nu has wrapped around (overflown) */
1005        fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_motstr_levmar_x()! Singular hessian matrix?\n");
1006        //exit(1);
1007        stop=6;
1008        break;
1009      }
1010      nu=nu2;
1011
1012      /* restore U, V diagonal entries */
1013      for(j=mcon; j<m; ++j){
1014        ptr1=U + j*Usz; // set ptr1 to point to U_j
1015        ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1016        for(i=0; i<cnp; ++i)
1017          ptr1[i*cnp+i]=ptr2[i];
1018      }
1019      for(i=0; i<n; ++i){
1020        ptr1=V + i*Vsz; // set ptr1 to point to V_i
1021        ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1022        for(j=0; j<pnp; ++j)
1023          ptr1[j*pnp+j]=ptr2[j];
1024      }
1025    } /* inner loop */
1026
1027    if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
1028  }
1029
1030  if(itno>=itmax) stop=3;
1031
1032  /* restore U, V diagonal entries */
1033  for(j=mcon; j<m; ++j){
1034    ptr1=U + j*Usz; // set ptr1 to point to U_j
1035    ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1036    for(i=0; i<cnp; ++i)
1037      ptr1[i*cnp+i]=ptr2[i];
1038  }
1039  for(i=0; i<n; ++i){
1040    ptr1=V + i*Vsz; // set ptr1 to point to V_i
1041    ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1042    for(j=0; j<pnp; ++j)
1043     ptr1[j*pnp+j]=ptr2[j];
1044  }
1045
1046  if(info){
1047    info[0]=init_p_eL2;
1048    info[1]=p_eL2;
1049    info[2]=eab_inf;
1050    info[3]=dp_L2;
1051    for(j=mcon, tmp=DBL_MIN; j<m; ++j){
1052      ptr1=U + j*Usz; // set ptr1 to point to U_j
1053      for(i=0; i<cnp; ++i)
1054        if(tmp<ptr1[i*cnp+i]) tmp=ptr1[i*cnp+i];
1055    }
1056    for(i=0; i<n; ++i){
1057      ptr1=V + i*Vsz; // set ptr1 to point to V_i
1058      for(j=0; j<pnp; ++j)
1059        if(tmp<ptr1[j*pnp+j]) tmp=ptr1[j*pnp+j];
1060      }
1061    info[4]=mu/tmp;
1062    info[5]=itno;
1063    info[6]=stop;
1064    info[7]=nfev;
1065    info[8]=njev;
1066    info[9]=nlss;
1067  }
1068                                                               
1069  //sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs);
1070  retval=(stop!=4)?  itno : -1;
1071
1072freemem_and_return: /* NOTE: this point is also reached via a goto! */
1073
1074   /* free whatever was allocated */
1075  free(jac); free(U); free(V);
1076  free(V_1); free(e); free(eab); 
1077  free(E);   free(W); free(Y);         
1078  free(YWt); free(S); free(dp);               
1079  free(Wtda);
1080  free(rcidxs); free(rcsubs);
1081
1082  free(hx); free(diagUV); free(pdp);
1083  if(fdj_data.hxx){ // cleanup
1084    free(fdj_data.hxx);
1085    free(fdj_data.func_rcidxs);
1086  }
1087
1088  sba_crsm_free(&idxij);
1089
1090  return retval;
1091}
1092
1093
1094/* Bundle adjustment on camera parameters only
1095 * using the sparse Levenberg-Marquardt as described in HZ p. 568
1096 */
1097
1098int sba_mot_levmar_x(
1099    const int n,   /* number of points */
1100    const int m,   /* number of images */
1101    const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified.
1102                                                  * All A_ij (see below) with j<mcon are assumed to be zero
1103                                                  */
1104    char *vmask,  /* visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */
1105    double *p,    /* initial parameter vector p0: (a1, ..., am).
1106                   * aj are the image j parameters, size m*cnp */
1107    const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */
1108    double *x,    /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
1109                   * x_ij is the projection of the i-th point on the j-th image.
1110                   * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
1111                   * see vmask[i][j], max. size n*m*mnp
1112                   */
1113    const int mnp,/* number of parameters for EACH measurement; usually 2 */
1114    void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
1115                                              /* functional relation describing measurements. Given a parameter vector p,
1116                                               * computes a prediction of the measurements \hat{x}. p is (m*cnp)x1,
1117                                               * \hat{x} is (n*m*mnp)x1, maximum
1118                                               * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1119                                               * as working memory
1120                                               */
1121    void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
1122                                              /* function to evaluate the sparse jacobian dX/dp.
1123                                               * The Jacobian is returned in jac as
1124                                               * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m), or (using HZ's notation),
1125                                               * jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm)
1126                                               * Notice that depending on idxij, some of the A_ij might be missing.
1127                                               * Note also that the A_ij are mnp x cnp matrices and they
1128                                               * should be stored in jac in row-major order.
1129                                               * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1130                                               * as working memory
1131                                               *
1132                                               * If NULL, the jacobian is approximated by repetitive func calls and finite
1133                                               * differences. This is computationally inefficient and thus NOT recommended.
1134                                               */
1135    void *adata,       /* pointer to possibly additional data, passed uninterpreted to func, fjac */ 
1136
1137    int itmax,         /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
1138    int verbose,       /* I: verbosity */
1139    double opts[SBA_OPTSSZ],
1140                             /* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu,
1141                        * stopping thresholds for ||J^T e||_inf, ||dp||_2 and ||e||_2
1142                        */
1143    double info[SBA_INFOSZ]
1144                             /* O: information regarding the minimization. Set to NULL if don't care
1145                        * info[0]=||e||_2 at initial p.
1146                        * info[1-4]=[ ||e||_2, ||J^T e||_inf,  ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
1147                        * info[5]= # iterations,
1148                        * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
1149                        *                                 2 - stopped by small dp
1150                        *                                 3 - stopped by itmax
1151                        *                                 4 - singular matrix. Restart from current p with increased mu
1152                        *                                 5 - stopped by small ||e||_2
1153                        *                                 6 - too many attempts to increase damping. Restart with increased mu
1154                        * info[7]= # function evaluations
1155                        * info[8]= # jacobian evaluations
1156                                          * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
1157                        */
1158)
1159{
1160register int i, j, ii, jj, k;
1161int nvis, nnz, retval;
1162
1163/* The following are work arrays that are dynamically allocated by sba_mot_levmar_x() */
1164double *jac; /* work array for storing the jacobian, max. size n*m*mnp*cnp */
1165double *U;    /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */
1166
1167double *e;    /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
1168                 max. size n*m*mnp */
1169double *ea;   /* work array for storing the ea_j in the order ea_1, .. ea_m, size m*cnp */
1170
1171double *dp;   /* work array for storing the parameter vector updates da_1, ..., da_m, size m*cnp */
1172
1173/* Of the above arrays, jac, e are sparse and
1174 * U, ea, dp are dense. Sparse arrays are indexed through
1175 * idxij (see below), that is with the same mechanism as the input
1176 * measurements vector x
1177 */
1178
1179/* submatrices sizes */
1180int Asz, Usz,
1181    esz, easz;
1182
1183register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
1184struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij
1185                        * in jac, e_ij in e, etc.
1186                        * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
1187                        * index k and it is used to efficiently lookup the memory locations where the non-zero
1188                        * blocks of a sparse matrix/vector are stored
1189                        */
1190int maxnm=(n>=m)? n:m, /* max. of (n, m) */
1191    *rcidxs,  /* work array for the indexes corresponding to the nonzero elements of a single row or
1192                 column in a sparse matrix, size max(n, m) */
1193    *rcsubs;  /* work array for the subscripts of nonzero elements in a single row or column of a
1194                 sparse matrix, size max(n, m) */
1195
1196/* The following variables are needed by the LM algorithm */
1197register int itno;  /* iteration counter */
1198int nsolved;
1199/* temporary work arrays that are dynamically allocated */
1200double *hx,         /* \hat{x}_i, max. size m*n*mnp */
1201       *diagU,      /* diagonals of U_j, size m*cnp */
1202       *pdp;        /* p + dp, size m*cnp */
1203
1204register double mu,  /* damping constant */
1205                tmp; /* mainly used in matrix & vector multiplications */
1206double p_eL2, ea_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
1207double p_L2, dp_L2=DBL_MAX, dF, dL;
1208double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2], eps3_sq=opts[3]*opts[3];
1209double init_p_eL2;
1210int nu=2, nu2, stop, nfev, njev=0, nlss=0;
1211int nobs, nvars;
1212
1213struct fdj_data_x_ fdj_data;
1214void *jac_adata;
1215
1216/* Initialization */
1217
1218  /* block sizes */
1219  Asz=mnp * cnp; Usz=cnp * cnp;
1220  esz=mnp; easz=cnp;
1221 
1222  /* count total number of visible image points */
1223  for(i=nvis=0, jj=n*m; i<jj; ++i)
1224    nvis+=vmask[i];
1225
1226  nobs=nvis*mnp;
1227  nvars=m*cnp;
1228  if(nobs<nvars){
1229    fprintf(stderr, "sba_mot_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
1230    exit(1);
1231  }
1232
1233  /* allocate work arrays */
1234  jac=(double *)emalloc(nvis*Asz*sizeof(double));
1235  U=(double *)emalloc(m*Usz*sizeof(double));
1236  e=(double *)emalloc(nobs*sizeof(double));
1237  ea=(double *)emalloc(nvars*sizeof(double));
1238  dp=(double *)emalloc(nvars*sizeof(double));
1239  rcidxs=(int *)emalloc(maxnm*sizeof(int));
1240  rcsubs=(int *)emalloc(maxnm*sizeof(int));
1241
1242
1243  hx=(double *)emalloc(nobs*sizeof(double));
1244  diagU=(double *)emalloc(nvars*sizeof(double));
1245  pdp=(double *)emalloc(nvars*sizeof(double));
1246
1247  /* allocate & fill up the idxij structure */
1248  sba_crsm_alloc(&idxij, n, m, nvis);
1249  for(i=k=0; i<n; ++i){
1250    idxij.rowptr[i]=k;
1251    ii=i*m;
1252    for(j=0; j<m; ++j)
1253      if(vmask[ii+j]){
1254        idxij.val[k]=k;
1255        idxij.colidx[k++]=j;
1256      }
1257  }
1258  idxij.rowptr[n]=nvis;
1259
1260  /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
1261  if(!fjac){
1262    fdj_data.func=func;
1263    fdj_data.cnp=cnp;
1264    fdj_data.pnp=0;
1265    fdj_data.mnp=mnp;
1266    fdj_data.hx=hx;
1267    fdj_data.hxx=(double *)emalloc(nobs*sizeof(double));
1268    fdj_data.func_rcidxs=(int *)emalloc(2*maxnm*sizeof(int));
1269    fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm;
1270    fdj_data.adata=adata;
1271
1272    fjac=sba_fdjac_x;
1273    jac_adata=(void *)&fdj_data;
1274  }
1275  else{
1276    fdj_data.hxx=NULL;
1277    jac_adata=adata;
1278  }
1279
1280  if(itmax==0){ /* verify jacobian */
1281    sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata);
1282    retval=0;
1283    goto freemem_and_return;
1284  }
1285
1286  /* compute the error vectors e_ij in hx */
1287  (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
1288  /* compute e=x - f(p) and its L2 norm */
1289  for(i=0, p_eL2=0.0; i<nobs; ++i){
1290    e[i]=tmp=x[i]-hx[i];
1291    p_eL2+=tmp*tmp;
1292  }
1293
1294  if(verbose) printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
1295  init_p_eL2=p_eL2;
1296
1297  for(itno=stop=0; itno<itmax && !stop; ++itno){
1298    /* Note that p, e and ||e||_2 have been updated at the previous iteration */
1299
1300    /* compute derivative submatrices A_ij */
1301    (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
1302
1303    /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here!
1304    /* U_j is symmetric, therefore its computation can be speeded up by
1305     * computing only the upper part and then reusing it for the lower one.
1306     * Recall that A_ij is mnp x cnp
1307     */
1308    /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here!
1309    /* Recall that e_ij is mnp x 1
1310     */
1311          _dblzero(U, m*Usz); /* clear all U_j */
1312          _dblzero(ea, m*easz); /* clear all ea_j */
1313    for(j=mcon; j<m; ++j){
1314      ptr1=U + j*Usz; // set ptr1 to point to U_j
1315      ptr2=ea + j*easz; // set ptr2 to point to ea_j
1316
1317      nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */
1318      for(i=0; i<nnz; ++i){
1319        /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */
1320        ptr3=jac + idxij.val[rcidxs[i]]*Asz;
1321
1322        /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */
1323        for(ii=0; ii<cnp; ++ii){
1324          for(jj=ii; jj<cnp; ++jj){
1325            for(k=0, sum=0.0; k<mnp; ++k)
1326              sum+=ptr3[k*cnp+ii]*ptr3[k*cnp+jj];
1327            ptr1[ii*cnp+jj]+=sum;
1328          }
1329
1330          /* copy the LOWER TRIANGULAR PART of U_j from the upper one */
1331          for(jj=0; jj<ii; ++jj)
1332            ptr1[ii*cnp+jj]=ptr1[jj*cnp+ii];
1333        }
1334
1335        ptr4=e + idxij.val[rcidxs[i]]*esz; /* set ptr4 to point to e_ij */
1336        /* compute A_ij^T e_ij and add it to ea_j */
1337        for(ii=0; ii<cnp; ++ii){
1338          for(jj=0, sum=0.0; jj<mnp; ++jj)
1339            sum+=ptr3[jj*cnp+ii]*ptr4[jj];
1340          ptr2[ii]+=sum;
1341        }
1342      }
1343    }
1344
1345    /* Compute ||J^T e||_inf and ||p||^2 */
1346    for(i=0, p_L2=ea_inf=0.0; i<nvars; ++i){
1347      if(ea_inf < (tmp=FABS(ea[i]))) ea_inf=tmp;
1348      p_L2+=p[i]*p[i];
1349    }
1350    //p_L2=sqrt(p_L2);
1351
1352    /* save diagonal entries so that augmentation can be later canceled.
1353     * Diagonal entries are in U_j
1354     */
1355    for(j=mcon; j<m; ++j){
1356      ptr1=U + j*Usz; // set ptr1 to point to U_j
1357      ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1358      for(i=0; i<cnp; ++i)
1359        ptr2[i]=ptr1[i*cnp+i];
1360    }
1361
1362/*
1363if(!(itno%100)){
1364  printf("Current estimate: ");
1365  for(i=0; i<nvars; ++i)
1366    printf("%.9g ", p[i]);
1367  printf("-- errors %.9g %0.9g\n", ea_inf, p_eL2);
1368}
1369*/
1370
1371    /* check for convergence */
1372    if((ea_inf <= eps1)){
1373      dp_L2=0.0; /* no increment for p in this case */
1374      stop=1;
1375      break;
1376    }
1377
1378   /* compute initial damping factor */
1379    if(itno==0){
1380      for(i=mcon*cnp, tmp=DBL_MIN; i<nvars; ++i)
1381        if(diagU[i]>tmp) tmp=diagU[i]; /* find max diagonal element */
1382      mu=tau*tmp;
1383    }
1384
1385    /* determine increment using adaptive damping */
1386    while(1){
1387      /* augment U */
1388      for(j=mcon; j<m; ++j){
1389        ptr1=U + j*Usz; // set ptr1 to point to U_j
1390        for(i=0; i<cnp; ++i)
1391          ptr1[i*cnp+i]+=mu;
1392      }
1393 
1394      /* solve the linear systems U_j da_j = ea_j to compute the da_j */
1395      _dblzero(dp, mcon*cnp); /* no change for the first mcon camera params */
1396      for(j=nsolved=mcon; j<m; ++j){
1397                    ptr1=U + j*Usz; // set ptr1 to point to U_j
1398                    ptr2=ea + j*easz; // set ptr2 to point to ea_j
1399                    ptr3=dp + j*cnp; // set ptr3 to point to da_j
1400
1401                    //nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1402                    nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1403                    //nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1404                    //nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1405                    //nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1406                    //nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); ++nlss;
1407              //nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); ++nlss;
1408            }
1409
1410            if(nsolved==m){
1411
1412        /* parameter vector updates are now in dp */
1413
1414        /* compute p's new estimate and ||dp||^2 */
1415        for(i=0, dp_L2=0.0; i<nvars; ++i){
1416          pdp[i]=p[i] + (tmp=dp[i]);
1417          dp_L2+=tmp*tmp;
1418        }
1419        //dp_L2=sqrt(dp_L2);
1420
1421        if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
1422        //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
1423          stop=2;
1424          break;
1425        }
1426
1427       if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
1428       //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
1429         stop=4;
1430         break;
1431       }
1432
1433        (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */
1434        if(verbose>1)
1435          printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
1436        for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute ||e(pdp)||_2 */
1437          hx[i]=tmp=x[i]-hx[i];
1438          pdp_eL2+=tmp*tmp;
1439        }
1440
1441        for(i=0, dL=0.0; i<nvars; ++i)
1442          dL+=dp[i]*(mu*dp[i]+ea[i]);
1443
1444        dF=p_eL2-pdp_eL2;
1445
1446        if(verbose>1)
1447          printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
1448
1449        if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
1450          tmp=(2.0*dF/dL-1.0);
1451          tmp=1.0-tmp*tmp*tmp;
1452          mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
1453          nu=2;
1454
1455          for(i=0; i<nvars; ++i) /* update p's estimate */
1456            p[i]=pdp[i];
1457
1458          for(i=0; i<nobs; ++i) /* update e and ||e||_2 */
1459            e[i]=hx[i];
1460          p_eL2=pdp_eL2;
1461          break;
1462        }
1463      } /* nsolved==m */
1464
1465      /* if this point is reached, either at least one linear system could not be solved or
1466       * the error did not reduce; in any case, the increment must be rejected
1467       */
1468
1469      mu*=nu;
1470      nu2=nu<<1; // 2*nu;
1471      if(nu2<=nu){ /* nu has wrapped around (overflown) */
1472        fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_mot_levmar_x()! Singular hessian matrix?\n");
1473        //exit(1);
1474        stop=6;
1475        break;
1476      }
1477      nu=nu2;
1478
1479      /* restore U diagonal entries */
1480      for(j=mcon; j<m; ++j){
1481        ptr1=U + j*Usz; // set ptr1 to point to U_j
1482        ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1483        for(i=0; i<cnp; ++i)
1484          ptr1[i*cnp+i]=ptr2[i];
1485      }
1486    } /* inner loop */
1487
1488    if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
1489  }
1490
1491  if(itno>=itmax) stop=3;
1492
1493  /* restore U diagonal entries */
1494  for(j=mcon; j<m; ++j){
1495    ptr1=U + j*Usz; // set ptr1 to point to U_j
1496    ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j
1497    for(i=0; i<cnp; ++i)
1498      ptr1[i*cnp+i]=ptr2[i];
1499  }
1500
1501  if(info){
1502    info[0]=init_p_eL2;
1503    info[1]=p_eL2;
1504    info[2]=ea_inf;
1505    info[3]=dp_L2;
1506    for(j=mcon, tmp=DBL_MIN; j<m; ++j){
1507      ptr1=U + j*Usz; // set ptr1 to point to U_j
1508      for(i=0; i<cnp; ++i)
1509        if(tmp<ptr1[i*cnp+i]) tmp=ptr1[i*cnp+i];
1510    }
1511    info[4]=mu/tmp;
1512    info[5]=itno;
1513    info[6]=stop;
1514    info[7]=nfev;
1515    info[8]=njev;
1516    info[9]=nlss;
1517  }
1518  //sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs);
1519  retval=(stop!=4)?  itno : -1;
1520                                                               
1521freemem_and_return: /* NOTE: this point is also reached via a goto! */
1522
1523   /* free whatever was allocated */
1524  free(jac); free(U);
1525  free(e); free(ea); 
1526  free(dp);
1527  free(rcidxs); free(rcsubs);
1528
1529  free(hx); free(diagU); free(pdp);
1530  if(fdj_data.hxx){ // cleanup
1531    free(fdj_data.hxx);
1532    free(fdj_data.func_rcidxs);
1533  }
1534
1535  sba_crsm_free(&idxij);
1536
1537  return retval;
1538}
1539
1540
1541/* Bundle adjustment on structure parameters only
1542 * using the sparse Levenberg-Marquardt as described in HZ p. 568
1543 */
1544
1545int sba_str_levmar_x(
1546    const int n,   /* number of points */
1547    const int m,   /* number of images */
1548    char *vmask,  /* visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */
1549    double *p,    /* initial parameter vector p0: (b1, ..., bn).
1550                   * bi are the i-th point parameters, * size n*pnp */
1551    const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */
1552    double *x,    /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where
1553                   * x_ij is the projection of the i-th point on the j-th image.
1554                   * NOTE: some of the x_ij might be missing, if point i is not visible in image j;
1555                   * see vmask[i][j], max. size n*m*mnp
1556                   */
1557    const int mnp,/* number of parameters for EACH measurement; usually 2 */
1558    void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata),
1559                                              /* functional relation describing measurements. Given a parameter vector p,
1560                                               * computes a prediction of the measurements \hat{x}. p is (n*pnp)x1,
1561                                               * \hat{x} is (n*m*mnp)x1, maximum
1562                                               * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1563                                               * as working memory
1564                                               */
1565    void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata),
1566                                              /* function to evaluate the sparse jacobian dX/dp.
1567                                               * The Jacobian is returned in jac as
1568                                               * (dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation),
1569                                               * jac=(B_11, ..., B_1m, ..., B_n1, ..., B_nm)
1570                                               * Notice that depending on idxij, some of the B_ij might be missing.
1571                                               * Note also that B_ij are mnp x pnp matrices and they
1572                                               * should be stored in jac in row-major order.
1573                                               * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used
1574                                               * as working memory
1575                                               *
1576                                               * If NULL, the jacobian is approximated by repetitive func calls and finite
1577                                               * differences. This is computationally inefficient and thus NOT recommended.
1578                                               */
1579    void *adata,       /* pointer to possibly additional data, passed uninterpreted to func, fjac */ 
1580
1581    int itmax,         /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */
1582    int verbose,       /* I: verbosity */
1583    double opts[SBA_OPTSSZ],
1584                             /* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu,
1585                        * stopping thresholds for ||J^T e||_inf, ||dp||_2 and ||e||_2
1586                        */
1587    double info[SBA_INFOSZ]
1588                             /* O: information regarding the minimization. Set to NULL if don't care
1589                        * info[0]=||e||_2 at initial p.
1590                        * info[1-4]=[ ||e||_2, ||J^T e||_inf,  ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
1591                        * info[5]= # iterations,
1592                        * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
1593                        *                                 2 - stopped by small dp
1594                        *                                 3 - stopped by itmax
1595                        *                                 4 - singular matrix. Restart from current p with increased mu
1596                        *                                 5 - stopped by small ||e||_2
1597                        *                                 6 - too many attempts to increase damping. Restart with increased mu
1598                        * info[7]= # function evaluations
1599                        * info[8]= # jacobian evaluations
1600                                          * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error
1601                        */
1602)
1603{
1604register int i, j, ii, jj, k;
1605int nvis, nnz, retval;
1606
1607/* The following are work arrays that are dynamically allocated by sba_str_levmar_x() */
1608double *jac;  /* work array for storing the jacobian, max. size n*m*mnp*pnp */
1609double *V;    /* work array for storing the V_i in the order V_1, ..., V_n, size n*pnp*pnp */
1610
1611double *e;    /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm,
1612                 max. size n*m*mnp */
1613double *eb;   /* work array for storing the eb_i in the order eb_1, .. eb_n size n*pnp */
1614
1615double *dp;   /* work array for storing the parameter vector updates db_1, ..., db_n, size n*pnp */
1616
1617/* Of the above arrays, jac, e, are sparse and
1618 * V, eb, dp are dense. Sparse arrays are indexed through
1619 * idxij (see below), that is with the same mechanism as the input
1620 * measurements vector x
1621 */
1622
1623/* submatrices sizes */
1624int Bsz, Vsz,
1625    esz, ebsz;
1626
1627register double *ptr1, *ptr2, *ptr3, *ptr4, sum;
1628struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location
1629                        * of B_ij in jac, etc.
1630                        * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous
1631                        * index k and it is used to efficiently lookup the memory locations where the non-zero
1632                        * blocks of a sparse matrix/vector are stored
1633                        */
1634int maxnm=(n>=m)? n:m, /* max. of (n, m) */
1635    *rcidxs,  /* work array for the indexes corresponding to the nonzero elements of a single row or
1636                 column in a sparse matrix, size max(n, m) */
1637    *rcsubs;  /* work array for the subscripts of nonzero elements in a single row or column of a
1638                 sparse matrix, size max(n, m) */
1639
1640/* The following variables are needed by the LM algorithm */
1641register int itno;  /* iteration counter */
1642int nsolved;
1643/* temporary work arrays that are dynamically allocated */
1644double *hx,         /* \hat{x}_i, max. size m*n*mnp */
1645       *diagV,      /* diagonals of V_i, size n*pnp */
1646       *pdp;        /* p + dp, size n*pnp */
1647
1648register double mu,  /* damping constant */
1649                tmp; /* mainly used in matrix & vector multiplications */
1650double p_eL2, eb_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */
1651double p_L2, dp_L2=DBL_MAX, dF, dL;
1652double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2], eps3_sq=opts[3]*opts[3];
1653double init_p_eL2;
1654int nu=2, nu2, stop, nfev, njev=0, nlss=0;
1655int nobs, nvars;
1656
1657struct fdj_data_x_ fdj_data;
1658void *jac_adata;
1659
1660/* Initialization */
1661
1662  /* block sizes */
1663  Bsz=mnp * pnp; Vsz=pnp * pnp;
1664  esz=mnp; ebsz=pnp;
1665
1666  /* count total number of visible image points */
1667  for(i=nvis=0, jj=n*m; i<jj; ++i)
1668    nvis+=vmask[i];
1669
1670  nobs=nvis*mnp;
1671  nvars=n*pnp;
1672  if(nobs<nvars){
1673    fprintf(stderr, "sba_str_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars);
1674    exit(1);
1675  }
1676
1677  /* allocate work arrays */
1678  jac=(double *)emalloc(nvis*Bsz*sizeof(double));
1679  V=(double *)emalloc(n*Vsz*sizeof(double));
1680  e=(double *)emalloc(nobs*sizeof(double));
1681  eb=(double *)emalloc(nvars*sizeof(double));
1682  dp=(double *)emalloc(nvars*sizeof(double));
1683  rcidxs=(int *)emalloc(maxnm*sizeof(int));
1684  rcsubs=(int *)emalloc(maxnm*sizeof(int));
1685
1686
1687  hx=(double *)emalloc(nobs*sizeof(double));
1688  diagV=(double *)emalloc(nvars*sizeof(double));
1689  pdp=(double *)emalloc(nvars*sizeof(double));
1690
1691  /* allocate & fill up the idxij structure */
1692  sba_crsm_alloc(&idxij, n, m, nvis);
1693  for(i=k=0; i<n; ++i){
1694    idxij.rowptr[i]=k;
1695    ii=i*m;
1696    for(j=0; j<m; ++j)
1697      if(vmask[ii+j]){
1698        idxij.val[k]=k;
1699        idxij.colidx[k++]=j;
1700      }
1701  }
1702  idxij.rowptr[n]=nvis;
1703
1704  /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */
1705  if(!fjac){
1706    fdj_data.func=func;
1707    fdj_data.cnp=0;
1708    fdj_data.pnp=pnp;
1709    fdj_data.mnp=mnp;
1710    fdj_data.hx=hx;
1711    fdj_data.hxx=(double *)emalloc(nobs*sizeof(double));
1712    fdj_data.func_rcidxs=(int *)emalloc(2*maxnm*sizeof(int));
1713    fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm;
1714    fdj_data.adata=adata;
1715
1716    fjac=sba_fdjac_x;
1717    jac_adata=(void *)&fdj_data;
1718  }
1719  else{
1720    fdj_data.hxx=NULL;
1721    jac_adata=adata;
1722  }
1723
1724  if(itmax==0){ /* verify jacobian */
1725    sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, pnp, mnp, adata, jac_adata);
1726    retval=0;
1727    goto freemem_and_return;
1728  }
1729
1730  /* compute the error vectors e_ij in hx */
1731  (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1;
1732  /* compute e=x - f(p) and its L2 norm */
1733  for(i=0, p_eL2=0.0; i<nobs; ++i){
1734    e[i]=tmp=x[i]-hx[i];
1735    p_eL2+=tmp*tmp;
1736  }
1737
1738  if(verbose) printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2/nvis);
1739  init_p_eL2=p_eL2;
1740
1741  for(itno=stop=0; itno<itmax && !stop; ++itno){
1742    /* Note that p, e and ||e||_2 have been updated at the previous iteration */
1743
1744    /* compute derivative submatrices B_ij */
1745    (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev;
1746
1747    /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here!
1748    /* V_i is symmetric, therefore its computation can be speeded up by
1749     * computing only the upper part and then reusing it for the lower one.
1750     * Recall that B_ij is mnp x pnp
1751     */
1752    /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here!
1753    /* Recall that e_ij is mnp x 1
1754     */
1755          _dblzero(V, n*Vsz); /* clear all V_i */
1756          _dblzero(eb, n*ebsz); /* clear all eb_i */
1757    for(i=0; i<n; ++i){
1758      ptr1=V + i*Vsz; // set ptr1 to point to V_i
1759      ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
1760
1761      nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */
1762      for(j=0; j<nnz; ++j){
1763        /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */
1764        ptr3=jac + idxij.val[rcidxs[j]]*Bsz;
1765     
1766        /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */
1767        for(ii=0; ii<pnp; ++ii){
1768          for(jj=ii; jj<pnp; ++jj){
1769            for(k=0, sum=0.0; k<mnp; ++k)
1770              sum+=ptr3[k*pnp+ii]*ptr3[k*pnp+jj];
1771            ptr1[ii*pnp+jj]+=sum;
1772          }
1773
1774          /* copy the LOWER TRIANGULAR PART of V_i from the upper one */
1775          for(jj=0; jj<ii; ++jj)
1776            ptr1[ii*pnp+jj]=ptr1[jj*pnp+ii];
1777        }
1778
1779        ptr4=e + idxij.val[rcidxs[j]]*esz; /* set ptr4 to point to e_ij */
1780        /* compute B_ij^T e_ij and add it to eb_i */
1781        for(ii=0; ii<pnp; ++ii){
1782          for(jj=0, sum=0.0; jj<mnp; ++jj)
1783            sum+=ptr3[jj*pnp+ii]*ptr4[jj];
1784          ptr2[ii]+=sum;
1785        }
1786      }
1787    }
1788
1789    /* Compute ||J^T e||_inf and ||p||^2 */
1790    for(i=0, p_L2=eb_inf=0.0; i<nvars; ++i){
1791      if(eb_inf < (tmp=FABS(eb[i]))) eb_inf=tmp;
1792      p_L2+=p[i]*p[i];
1793    }
1794    //p_L2=sqrt(p_L2);
1795
1796    /* save diagonal entries so that augmentation can be later canceled.
1797     * Diagonal entries are in V_i
1798     */
1799    for(i=0; i<n; ++i){
1800      ptr1=V + i*Vsz; // set ptr1 to point to V_i
1801      ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1802      for(j=0; j<pnp; ++j)
1803        ptr2[j]=ptr1[j*pnp+j];
1804    }
1805
1806/*
1807if(!(itno%100)){
1808  printf("Current estimate: ");
1809  for(i=0; i<nvars; ++i)
1810    printf("%.9g ", p[i]);
1811  printf("-- errors %.9g %0.9g\n", eb_inf, p_eL2);
1812}
1813*/
1814
1815    /* check for convergence */
1816    if((eb_inf <= eps1)){
1817      dp_L2=0.0; /* no increment for p in this case */
1818      stop=1;
1819      break;
1820    }
1821
1822   /* compute initial damping factor */
1823    if(itno==0){
1824      for(i=0, tmp=DBL_MIN; i<nvars; ++i)
1825        if(diagV[i]>tmp) tmp=diagV[i]; /* find max diagonal element */
1826      mu=tau*tmp;
1827    }
1828
1829    /* determine increment using adaptive damping */
1830    while(1){
1831      /* augment V */
1832      for(i=0; i<n; ++i){
1833        ptr1=V + i*Vsz; // set ptr1 to point to V_i
1834        for(j=0; j<pnp; ++j)
1835          ptr1[j*pnp+j]+=mu;
1836      }
1837
1838      /* solve the linear systems V*_i db_i = eb_i to compute the db_i */
1839      for(i=nsolved=0; i<n; ++i){
1840        ptr1=V + i*Vsz; // set ptr1 to point to V_i
1841        ptr2=eb + i*ebsz; // set ptr2 to point to eb_i
1842        ptr3=dp + i*pnp; // set ptr3 to point to db_i
1843
1844        //nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1845        nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1846        //nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1847        //nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1848        //nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1849        //nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); ++nlss;
1850              //nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); ++nlss;
1851      }
1852
1853      if(nsolved==n){
1854
1855        /* parameter vector updates are now in dp */
1856
1857        /* compute p's new estimate and ||dp||^2 */
1858        for(i=0, dp_L2=0.0; i<nvars; ++i){
1859          pdp[i]=p[i] + (tmp=dp[i]);
1860          dp_L2+=tmp*tmp;
1861        }
1862        //dp_L2=sqrt(dp_L2);
1863
1864        if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
1865        //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
1866          stop=2;
1867          break;
1868        }
1869
1870       if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */
1871       //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */
1872         stop=4;
1873         break;
1874       }
1875
1876        (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */
1877        if(verbose>1)
1878          printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs));
1879        for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute ||e(pdp)||_2 */
1880          hx[i]=tmp=x[i]-hx[i];
1881          pdp_eL2+=tmp*tmp;
1882        }
1883
1884        for(i=0, dL=0.0; i<nvars; ++i)
1885          dL+=dp[i]*(mu*dp[i]+eb[i]);
1886
1887        dF=p_eL2-pdp_eL2;
1888
1889        if(verbose>1)
1890          printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2);
1891
1892        if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
1893          tmp=(2.0*dF/dL-1.0);
1894          tmp=1.0-tmp*tmp*tmp;
1895          mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD );
1896          nu=2;
1897
1898          for(i=0; i<nvars; ++i) /* update p's estimate */
1899            p[i]=pdp[i];
1900
1901          for(i=0; i<nobs; ++i) /* update e and ||e||_2 */
1902            e[i]=hx[i];
1903          p_eL2=pdp_eL2;
1904          break;
1905        }
1906      } /* nsolved==n */
1907
1908      /* if this point is reached, either at least one linear system could not be solved or
1909       * the error did not reduce; in any case, the increment must be rejected
1910       */
1911
1912      mu*=nu;
1913      nu2=nu<<1; // 2*nu;
1914      if(nu2<=nu){ /* nu has wrapped around (overflown) */
1915        fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_str_levmar_x()! Singular hessian matrix?\n");
1916        //exit(1);
1917        stop=6;
1918        break;
1919      }
1920      nu=nu2;
1921
1922      /* restore V diagonal entries */
1923      for(i=0; i<n; ++i){
1924        ptr1=V + i*Vsz; // set ptr1 to point to V_i
1925        ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1926        for(j=0; j<pnp; ++j)
1927          ptr1[j*pnp+j]=ptr2[j];
1928      }
1929    } /* inner loop */
1930
1931    if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop
1932  }
1933
1934  if(itno>=itmax) stop=3;
1935
1936  /* restore V diagonal entries */
1937  for(i=0; i<n; ++i){
1938    ptr1=V + i*Vsz; // set ptr1 to point to V_i
1939    ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i
1940    for(j=0; j<pnp; ++j)
1941      ptr1[j*pnp+j]=ptr2[j];
1942  }
1943
1944  if(info){
1945    info[0]=init_p_eL2;
1946    info[1]=p_eL2;
1947    info[2]=eb_inf;
1948    info[3]=dp_L2;
1949    for(i=0; i<n; ++i){
1950      ptr1=V + i*Vsz; // set ptr1 to point to V_i
1951      for(j=0; j<pnp; ++j)
1952        if(tmp<ptr1[j*pnp+j]) tmp=ptr1[j*pnp+j];
1953      }
1954    info[4]=mu/tmp;
1955    info[5]=itno;
1956    info[6]=stop;
1957    info[7]=nfev;
1958    info[8]=njev;
1959    info[9]=nlss;
1960  }
1961  //sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs);
1962  retval=(stop!=4)?  itno : -1;
1963                                                               
1964freemem_and_return: /* NOTE: this point is also reached via a goto! */
1965
1966   /* free whatever was allocated */
1967  free(jac); free(V);
1968  free(e); free(eb); 
1969  free(dp);               
1970  free(rcidxs); free(rcsubs);
1971
1972  free(hx); free(diagV); free(pdp);
1973  if(fdj_data.hxx){ // cleanup
1974    free(fdj_data.hxx);
1975    free(fdj_data.func_rcidxs);
1976  }
1977
1978  sba_crsm_free(&idxij);
1979
1980  return retval;
1981}
Note: See TracBrowser for help on using the repository browser.