1 | ///////////////////////////////////////////////////////////////////////////////// |
---|
2 | //// |
---|
3 | //// Expert drivers for sparse bundle adjustment based on the |
---|
4 | //// Levenberg - Marquardt minimization algorithm |
---|
5 | //// Copyright (C) 2004 Manolis Lourakis (lourakis@ics.forth.gr) |
---|
6 | //// Institute of Computer Science, Foundation for Research & Technology - Hellas |
---|
7 | //// Heraklion, Crete, Greece. |
---|
8 | //// |
---|
9 | //// This program is free software; you can redistribute it and/or modify |
---|
10 | //// it under the terms of the GNU General Public License as published by |
---|
11 | //// the Free Software Foundation; either version 2 of the License, or |
---|
12 | //// (at your option) any later version. |
---|
13 | //// |
---|
14 | //// This program is distributed in the hope that it will be useful, |
---|
15 | //// but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
16 | //// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
17 | //// GNU General Public License for more details. |
---|
18 | //// |
---|
19 | /////////////////////////////////////////////////////////////////////////////////// |
---|
20 | |
---|
21 | #include <stdio.h> |
---|
22 | #include <stdlib.h> |
---|
23 | #include <string.h> |
---|
24 | #include <math.h> |
---|
25 | #include <float.h> |
---|
26 | |
---|
27 | #include "sba.h" |
---|
28 | #include "sba_chkjac.h" |
---|
29 | |
---|
30 | #define SBA_EPSILON 1E-12 |
---|
31 | #define SBA_EPSILON_SQ ( (SBA_EPSILON)*(SBA_EPSILON) ) |
---|
32 | |
---|
33 | #define SBA_ONE_THIRD 0.3333333334 /* 1.0/3.0 */ |
---|
34 | |
---|
35 | |
---|
36 | #define emalloc(sz) emalloc_(__FILE__, __LINE__, sz) |
---|
37 | |
---|
38 | #define FABS(x) (((x)>=0)? (x) : -(x)) |
---|
39 | |
---|
40 | #define ROW_MAJOR 0 |
---|
41 | #define COLUMN_MAJOR 1 |
---|
42 | #define MAT_STORAGE COLUMN_MAJOR |
---|
43 | |
---|
44 | |
---|
45 | /* inline */ |
---|
46 | #ifdef _MSC_VER |
---|
47 | #define inline __inline //MSVC |
---|
48 | #elif !defined(__GNUC__) |
---|
49 | #define inline //other than MSVC, GCC: define empty |
---|
50 | #endif |
---|
51 | |
---|
52 | /* contains information necessary for computing a finite difference approximation to a jacobian, |
---|
53 | * e.g. function to differentiate, problem dimensions and pointers to working memory buffers |
---|
54 | */ |
---|
55 | struct fdj_data_x_ { |
---|
56 | void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); /* function to differentiate */ |
---|
57 | int cnp, pnp, mnp; /* parameter numbers */ |
---|
58 | int *func_rcidxs, |
---|
59 | *func_rcsubs; /* working memory for func invocations. |
---|
60 | * Notice that this has to be different |
---|
61 | * than the working memory used for |
---|
62 | * evaluating the jacobian! |
---|
63 | */ |
---|
64 | double *hx, *hxx; /* memory to save results in */ |
---|
65 | void *adata; |
---|
66 | }; |
---|
67 | |
---|
68 | /* auxiliary memory allocation routine with error checking */ |
---|
69 | inline static void *emalloc_(char *file, int line, size_t sz) |
---|
70 | { |
---|
71 | void *ptr; |
---|
72 | |
---|
73 | ptr=(void *)malloc(sz); |
---|
74 | if(ptr==NULL){ |
---|
75 | fprintf(stderr, "memory allocation request for %u bytes failed in file %s, line %d, exiting", sz, file, line); |
---|
76 | exit(1); |
---|
77 | } |
---|
78 | |
---|
79 | return ptr; |
---|
80 | } |
---|
81 | |
---|
82 | /* auxiliary routine for clearing an array of doubles */ |
---|
83 | inline static void _dblzero(register double *arr, register int count) |
---|
84 | { |
---|
85 | while(--count>=0) |
---|
86 | *arr++=0.0; |
---|
87 | } |
---|
88 | |
---|
89 | /* auxiliary routine for computing the mean reprojection error; used for debugging */ |
---|
90 | static double sba_mean_repr_error(int n, int mnp, double *x, double *hx, struct sba_crsm *idxij, int *rcidxs, int *rcsubs) |
---|
91 | { |
---|
92 | register int i, j; |
---|
93 | int nnz, nprojs; |
---|
94 | double *ptr1, *ptr2; |
---|
95 | double err; |
---|
96 | |
---|
97 | for(i=nprojs=0, err=0.0; i<n; ++i){ |
---|
98 | nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ |
---|
99 | nprojs+=nnz; |
---|
100 | for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */ |
---|
101 | ptr1=x + idxij->val[rcidxs[j]]*mnp; |
---|
102 | ptr2=hx + idxij->val[rcidxs[j]]*mnp; |
---|
103 | |
---|
104 | err+=sqrt((ptr1[0]-ptr2[0])*(ptr1[0]-ptr2[0]) + (ptr1[1]-ptr2[1])*(ptr1[1]-ptr2[1])); |
---|
105 | } |
---|
106 | } |
---|
107 | |
---|
108 | return err/((double)(nprojs)); |
---|
109 | } |
---|
110 | |
---|
111 | /* print the solution in p using sba's text format. If cnp/pnp==0 only points/cameras are printed */ |
---|
112 | static void sba_print_sol(int n, int m, double *p, int cnp, int pnp, double *x, int mnp, struct sba_crsm *idxij, int *rcidxs, int *rcsubs) |
---|
113 | { |
---|
114 | register int i, j, ii; |
---|
115 | int nnz; |
---|
116 | double *ptr; |
---|
117 | |
---|
118 | if(cnp){ |
---|
119 | /* print camera parameters */ |
---|
120 | for(j=0; j<m; ++j){ |
---|
121 | ptr=p+cnp*j; |
---|
122 | for(ii=0; ii<cnp; ++ii) |
---|
123 | printf("%g ", ptr[ii]); |
---|
124 | printf("\n"); |
---|
125 | } |
---|
126 | } |
---|
127 | |
---|
128 | if(pnp){ |
---|
129 | /* 3D & 2D point parameters */ |
---|
130 | printf("\n\n\n# X Y Z nframes frame0 x0 y0 frame1 x1 y1 ...\n"); |
---|
131 | for(i=0; i<n; ++i){ |
---|
132 | ptr=p+cnp*m+i*pnp; |
---|
133 | for(ii=0; ii<pnp; ++ii) // print 3D coordinates |
---|
134 | printf("%g ", ptr[ii]); |
---|
135 | |
---|
136 | nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ |
---|
137 | printf("%d ", nnz); |
---|
138 | |
---|
139 | for(j=0; j<nnz; ++j){ /* point i projecting on camera rcsubs[j] */ |
---|
140 | ptr=x + idxij->val[rcidxs[j]]*mnp; |
---|
141 | |
---|
142 | printf("%d ", rcsubs[j]); |
---|
143 | for(ii=0; ii<mnp; ++ii) // print 2D coordinates |
---|
144 | printf("%g ", ptr[ii]); |
---|
145 | } |
---|
146 | printf("\n"); |
---|
147 | } |
---|
148 | } |
---|
149 | } |
---|
150 | |
---|
151 | |
---|
152 | /* Given a parameter vector p made up of the 3D coordinates of n points and the parameters of m cameras, compute in |
---|
153 | * jac the jacobian of the predicted measurements, i.e. the jacobian of the projections of 3D points in the m images. |
---|
154 | * The jacobian is approximated with the aid of finite differences and is returned in the order |
---|
155 | * (A_11, ..., A_1m, ..., A_n1, ..., A_nm, B_11, ..., B_1m, ..., B_n1, ..., B_nm), |
---|
156 | * where A_ij=dx_ij/da_j and B_ij=dx_ij/db_i (see HZ). |
---|
157 | * Notice that depending on idxij, some of the A_ij, B_ij might be missing |
---|
158 | * |
---|
159 | * Problem-specific information is assumed to be stored in a structure pointed to by "dat". |
---|
160 | * |
---|
161 | * NOTE: The jacobian (for n=4, m=3) in matrix form has the following structure: |
---|
162 | * A_11 0 0 B_11 0 0 0 |
---|
163 | * 0 A_12 0 B_12 0 0 0 |
---|
164 | * 0 0 A_13 B_13 0 0 0 |
---|
165 | * A_21 0 0 0 B_21 0 0 |
---|
166 | * 0 A_22 0 0 B_22 0 0 |
---|
167 | * 0 0 A_23 0 B_23 0 0 |
---|
168 | * A_31 0 0 0 0 B_31 0 |
---|
169 | * 0 A_32 0 0 0 B_32 0 |
---|
170 | * 0 0 A_33 0 0 B_33 0 |
---|
171 | * A_41 0 0 0 0 0 B_41 |
---|
172 | * 0 A_42 0 0 0 0 B_42 |
---|
173 | * 0 0 A_43 0 0 0 B_43 |
---|
174 | * To reduce the total number of objective function evaluations, this structure can be |
---|
175 | * exploited as follows: A certain d is added to the j-th parameters of all cameras and |
---|
176 | * the objective function is evaluated at the resulting point. This evaluation suffices |
---|
177 | * to compute the corresponding columns of *all* A_ij through finite differences. A similar |
---|
178 | * strategy allows the computation of the B_ij. Overall, only cnp+pnp+1 objective function |
---|
179 | * evaluations are needed to compute the jacobian, much fewer compared to the m*cnp+n*pnp+1 |
---|
180 | * that would be required by the naive strategy of computing one column of the jacobian |
---|
181 | * per function evaluation. See Nocedal-Wright, ch. 7, pp. 169. Although this approach is |
---|
182 | * much faster compared to the naive strategy, it is not preferable to analytic jacobians, |
---|
183 | * since the latter are considerably faster to compute and result in fewer LM iterations. |
---|
184 | */ |
---|
185 | static void sba_fdjac_x( |
---|
186 | double *p, /* I: current parameter estimate, (m*cnp+n*pnp)x1 */ |
---|
187 | struct sba_crsm *idxij, /* I: sparse matrix containing the location of x_ij in hx */ |
---|
188 | int *rcidxs, /* work array for the indexes of nonzero elements of a single sparse matrix row/column */ |
---|
189 | int *rcsubs, /* work array for the subscripts of nonzero elements in a single sparse matrix row/column */ |
---|
190 | double *jac, /* O: array for storing the approximated jacobian */ |
---|
191 | void *dat) /* I: points to a "fdj_data_x_" structure */ |
---|
192 | { |
---|
193 | register int i, j, ii, jj; |
---|
194 | double *pa, *pb, *pqr, *ppt, *jaca, *jacb; |
---|
195 | register double *pAB, *phx, *phxx; |
---|
196 | int n, m, nm, nnz, Asz, Bsz, idx; |
---|
197 | |
---|
198 | double *tmpd; |
---|
199 | register double d; |
---|
200 | |
---|
201 | struct fdj_data_x_ *fdjd; |
---|
202 | void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata); |
---|
203 | double *hx, *hxx; |
---|
204 | int cnp, pnp, mnp; |
---|
205 | void *adata; |
---|
206 | |
---|
207 | |
---|
208 | /* retrieve problem-specific information passed in *dat */ |
---|
209 | fdjd=(struct fdj_data_x_ *)dat; |
---|
210 | func=fdjd->func; |
---|
211 | cnp=fdjd->cnp; pnp=fdjd->pnp; mnp=fdjd->mnp; |
---|
212 | hx=fdjd->hx; |
---|
213 | hxx=fdjd->hxx; |
---|
214 | adata=fdjd->adata; |
---|
215 | |
---|
216 | n=idxij->nr; m=idxij->nc; |
---|
217 | pa=p; pb=p+m*cnp; |
---|
218 | Asz=mnp*cnp; Bsz=mnp*pnp; |
---|
219 | jaca=jac; jacb=jac+idxij->nnz*Asz; |
---|
220 | |
---|
221 | nm=(n>=m)? n : m; // max(n, m); |
---|
222 | tmpd=(double *)emalloc(nm*sizeof(double)); |
---|
223 | |
---|
224 | (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hx, adata); // evaluate supplied function on current solution |
---|
225 | |
---|
226 | if(cnp){ // is motion varying? |
---|
227 | /* compute A_ij */ |
---|
228 | for(jj=0; jj<cnp; ++jj){ |
---|
229 | for(j=0; j<m; ++j){ |
---|
230 | pqr=pa+j*cnp; // j-th camera parameters |
---|
231 | /* determine d=max(SBA_DELTA_SCALE*|pqr[jj]|, SBA_MIN_DELTA), see HZ */ |
---|
232 | d=(double)(SBA_DELTA_SCALE)*pqr[jj]; // force evaluation |
---|
233 | d=FABS(d); |
---|
234 | if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA; |
---|
235 | |
---|
236 | tmpd[j]=d; |
---|
237 | pqr[jj]+=d; |
---|
238 | } |
---|
239 | |
---|
240 | (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); |
---|
241 | |
---|
242 | for(j=0; j<m; ++j){ |
---|
243 | pqr=pa+j*cnp; // j-th camera parameters |
---|
244 | pqr[jj]-=tmpd[j]; /* restore */ |
---|
245 | d=1.0/tmpd[j]; /* invert so that divisions can be carried out faster as multiplications */ |
---|
246 | |
---|
247 | nnz=sba_crsm_col_elmidxs(idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ |
---|
248 | for(i=0; i<nnz; ++i){ |
---|
249 | idx=idxij->val[rcidxs[i]]; |
---|
250 | phx=hx + idx*mnp; // set phx to point to hx_ij |
---|
251 | phxx=hxx + idx*mnp; // set phxx to point to hxx_ij |
---|
252 | pAB=jaca + idx*Asz; // set pAB to point to A_ij |
---|
253 | |
---|
254 | for(ii=0; ii<mnp; ++ii) |
---|
255 | pAB[ii*cnp+jj]=(phxx[ii]-phx[ii])*d; |
---|
256 | } |
---|
257 | } |
---|
258 | } |
---|
259 | } |
---|
260 | |
---|
261 | if(pnp){ // is structure varying? |
---|
262 | /* compute B_ij */ |
---|
263 | for(jj=0; jj<pnp; ++jj){ |
---|
264 | for(i=0; i<n; ++i){ |
---|
265 | ppt=pb+i*pnp; // i-th point parameters |
---|
266 | /* determine d=max(SBA_DELTA_SCALE*|ppt[jj]|, SBA_MIN_DELTA), see HZ */ |
---|
267 | d=(double)(SBA_DELTA_SCALE)*ppt[jj]; // force evaluation |
---|
268 | d=FABS(d); |
---|
269 | if(d<SBA_MIN_DELTA) d=SBA_MIN_DELTA; |
---|
270 | |
---|
271 | tmpd[i]=d; |
---|
272 | ppt[jj]+=d; |
---|
273 | } |
---|
274 | |
---|
275 | (*func)(p, idxij, fdjd->func_rcidxs, fdjd->func_rcsubs, hxx, adata); |
---|
276 | |
---|
277 | for(i=0; i<n; ++i){ |
---|
278 | ppt=pb+i*pnp; // i-th point parameters |
---|
279 | ppt[jj]-=tmpd[i]; /* restore */ |
---|
280 | d=1.0/tmpd[i]; /* invert so that divisions can be carried out faster as multiplications */ |
---|
281 | |
---|
282 | nnz=sba_crsm_row_elmidxs(idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ |
---|
283 | for(j=0; j<nnz; ++j){ |
---|
284 | idx=idxij->val[rcidxs[j]]; |
---|
285 | phx=hx + idx*mnp; // set phx to point to hx_ij |
---|
286 | phxx=hxx + idx*mnp; // set phxx to point to hxx_ij |
---|
287 | pAB=jacb + idx*Bsz; // set pAB to point to B_ij |
---|
288 | |
---|
289 | for(ii=0; ii<mnp; ++ii) |
---|
290 | pAB[ii*pnp+jj]=(phxx[ii]-phx[ii])*d; |
---|
291 | } |
---|
292 | } |
---|
293 | } |
---|
294 | } |
---|
295 | |
---|
296 | free(tmpd); |
---|
297 | } |
---|
298 | |
---|
299 | /* Bundle adjustment on camera and structure parameters |
---|
300 | * using the sparse Levenberg-Marquardt as described in HZ p. 568 |
---|
301 | */ |
---|
302 | |
---|
303 | int sba_motstr_levmar_x( |
---|
304 | const int n, /* number of points */ |
---|
305 | const int m, /* number of images */ |
---|
306 | const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified. |
---|
307 | * All A_ij (see below) with j<mcon are assumed to be zero |
---|
308 | */ |
---|
309 | char *vmask, /* visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */ |
---|
310 | double *p, /* initial parameter vector p0: (a1, ..., am, b1, ..., bn). |
---|
311 | * aj are the image j parameters, bi are the i-th point parameters, |
---|
312 | * size m*cnp + n*pnp |
---|
313 | */ |
---|
314 | const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */ |
---|
315 | const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */ |
---|
316 | double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where |
---|
317 | * x_ij is the projection of the i-th point on the j-th image. |
---|
318 | * NOTE: some of the x_ij might be missing, if point i is not visible in image j; |
---|
319 | * see vmask[i][j], max. size n*m*mnp |
---|
320 | */ |
---|
321 | const int mnp,/* number of parameters for EACH measurement; usually 2 */ |
---|
322 | void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), |
---|
323 | /* functional relation describing measurements. Given a parameter vector p, |
---|
324 | * computes a prediction of the measurements \hat{x}. p is (m*cnp + n*pnp)x1, |
---|
325 | * \hat{x} is (n*m*mnp)x1, maximum |
---|
326 | * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used |
---|
327 | * as working memory |
---|
328 | */ |
---|
329 | void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), |
---|
330 | /* function to evaluate the sparse jacobian dX/dp. |
---|
331 | * The Jacobian is returned in jac as |
---|
332 | * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m, |
---|
333 | * dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation), |
---|
334 | * jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm, B_11, ..., B_1m, ..., B_n1, ..., B_nm) |
---|
335 | * Notice that depending on idxij, some of the A_ij and B_ij might be missing. |
---|
336 | * Note also that A_ij and B_ij are mnp x cnp and mnp x pnp matrices resp. and they |
---|
337 | * should be stored in jac in row-major order. |
---|
338 | * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used |
---|
339 | * as working memory |
---|
340 | * |
---|
341 | * If NULL, the jacobian is approximated by repetitive func calls and finite |
---|
342 | * differences. This is computationally inefficient and thus NOT recommended. |
---|
343 | */ |
---|
344 | void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */ |
---|
345 | |
---|
346 | int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */ |
---|
347 | int verbose, /* I: verbosity */ |
---|
348 | double opts[SBA_OPTSSZ], |
---|
349 | /* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu, |
---|
350 | * stopping thresholds for ||J^T e||_inf, ||dp||_2 and ||e||_2 |
---|
351 | */ |
---|
352 | double info[SBA_INFOSZ] |
---|
353 | /* O: information regarding the minimization. Set to NULL if don't care |
---|
354 | * info[0]=||e||_2 at initial p. |
---|
355 | * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. |
---|
356 | * info[5]= # iterations, |
---|
357 | * info[6]=reason for terminating: 1 - stopped by small gradient J^T e |
---|
358 | * 2 - stopped by small dp |
---|
359 | * 3 - stopped by itmax |
---|
360 | * 4 - singular matrix. Restart from current p with increased mu |
---|
361 | * 5 - stopped by small ||e||_2 |
---|
362 | * 6 - too many attempts to increase damping. Restart with increased mu |
---|
363 | * info[7]= # function evaluations |
---|
364 | * info[8]= # jacobian evaluations |
---|
365 | * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error |
---|
366 | */ |
---|
367 | ) |
---|
368 | { |
---|
369 | register int i, j, ii, jj, k, l; |
---|
370 | int nvis, nnz, retval; |
---|
371 | |
---|
372 | /* The following are work arrays that are dynamically allocated by sba_motstr_levmar_x() */ |
---|
373 | double *jac; /* work array for storing the jacobian, max. size n*m*(mnp*cnp + mnp*pnp) */ |
---|
374 | double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */ |
---|
375 | double *V; /* work array for storing the V_i in the order V_1, ..., V_n, size n*pnp*pnp */ |
---|
376 | double *V_1; /* work array for storing the (V*_i)^-1 in the order (V*_1)^-1, ..., (V*_n)^-1, size n*pnp*pnp */ |
---|
377 | |
---|
378 | double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm, |
---|
379 | max. size n*m*mnp */ |
---|
380 | double *eab; /* work array for storing the ea_j & eb_i in the order ea_1, .. ea_m eb_1, .. eb_n size m*cnp + n*pnp */ |
---|
381 | |
---|
382 | double *E; /* work array for storing the e_j in the order e_1, .. e_m, size m*cnp */ |
---|
383 | |
---|
384 | /* Notice that the blocks W_ij, Y_ij are zero iff A_ij (equivalently B_ij) is zero. This means |
---|
385 | * that the matrices consisting of blocks W_ij and Y_ij are themselves sparse, similarly to the |
---|
386 | * block matrices made up of the A_ij and B_ij (i.e. jaca and jacb) |
---|
387 | */ |
---|
388 | double *W; /* work array for storing the W_ij in the order W_11, ..., W_1m, ..., W_n1, ..., W_nm, |
---|
389 | max. size n*m*cnp*pnp */ |
---|
390 | double *Y; /* work array for storing the Y_ij in the order Y_11, ..., Y_1m, ..., Y_n1, ..., Y_nm, |
---|
391 | max. size n*m*cnp*pnp */ |
---|
392 | double *YWt; /* work array for storing \sum_i Y_ij W_ik^T, size cnp*cnp */ |
---|
393 | double *S; /* work array for storing the block array S_jk, size m*m*cnp*cnp */ |
---|
394 | double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, db_1, ..., db_n, size m*cnp + n*pnp */ |
---|
395 | double *Wtda; /* work array for storing \sum_j W_ij^T da_j, size pnp */ |
---|
396 | |
---|
397 | /* Of the above arrays, jac, e, W, Y are sparse and |
---|
398 | * U, V, V_1, eab, E, S, dp are dense. Sparse arrays are indexed through |
---|
399 | * idxij (see below), that is with the same mechanism as the input |
---|
400 | * measurements vector x |
---|
401 | */ |
---|
402 | |
---|
403 | double *pa, *pb, *jaca, *jacb, *ea, *eb, *dpa, *dpb; /* pointers into p, jac, eab and dp respectively */ |
---|
404 | |
---|
405 | /* submatrices sizes */ |
---|
406 | int Asz, Bsz, Usz, Vsz, |
---|
407 | Wsz, Ysz, esz, easz, ebsz, |
---|
408 | YWtsz, Wtdasz, Sblsz; |
---|
409 | |
---|
410 | int Sdim; /* S matrix actual dimension */ |
---|
411 | |
---|
412 | register double *ptr1, *ptr2, *ptr3, *ptr4, sum; |
---|
413 | struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij |
---|
414 | * in jaca, B_ij in jacb, etc. |
---|
415 | * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous |
---|
416 | * index k and it is used to efficiently lookup the memory locations where the non-zero |
---|
417 | * blocks of a sparse matrix/vector are stored |
---|
418 | */ |
---|
419 | int maxnm=(n>=m)? n:m, /* max. of (n, m) */ |
---|
420 | *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or |
---|
421 | column in a sparse matrix, size max(n, m) */ |
---|
422 | *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a |
---|
423 | sparse matrix, size max(n, m) */ |
---|
424 | |
---|
425 | /* The following variables are needed by the LM algorithm */ |
---|
426 | register int itno; /* iteration counter */ |
---|
427 | int issolved; |
---|
428 | /* temporary work arrays that are dynamically allocated */ |
---|
429 | double *hx, /* \hat{x}_i, max. size m*n*mnp */ |
---|
430 | *diagUV, /* diagonals of U_j, V_i, size m*cnp + n*pnp */ |
---|
431 | *pdp; /* p + dp, size m*cnp + n*pnp */ |
---|
432 | |
---|
433 | double *diagU, *diagV; /* pointers into diagUV */ |
---|
434 | |
---|
435 | register double mu, /* damping constant */ |
---|
436 | tmp; /* mainly used in matrix & vector multiplications */ |
---|
437 | double p_eL2, eab_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */ |
---|
438 | double p_L2, dp_L2=DBL_MAX, dF, dL; |
---|
439 | double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2], eps3_sq=opts[3]*opts[3]; |
---|
440 | double init_p_eL2; |
---|
441 | int nu=2, nu2, stop, nfev, njev=0, nlss=0; |
---|
442 | int nobs, nvars; |
---|
443 | const int mmcon=m-mcon; |
---|
444 | |
---|
445 | struct fdj_data_x_ fdj_data; |
---|
446 | void *jac_adata; |
---|
447 | |
---|
448 | /* Initialization */ |
---|
449 | |
---|
450 | /* block sizes */ |
---|
451 | Asz=mnp * cnp; Bsz=mnp * pnp; |
---|
452 | Usz=cnp * cnp; Vsz=pnp * pnp; |
---|
453 | Wsz=cnp * pnp; Ysz=cnp * pnp; |
---|
454 | esz=mnp; |
---|
455 | easz=cnp; ebsz=pnp; |
---|
456 | YWtsz=cnp * cnp; |
---|
457 | Wtdasz=pnp; |
---|
458 | Sblsz=cnp * cnp; |
---|
459 | Sdim=mmcon * cnp; |
---|
460 | |
---|
461 | /* count total number of visible image points */ |
---|
462 | for(i=nvis=0, jj=n*m; i<jj; ++i) |
---|
463 | nvis+=vmask[i]; |
---|
464 | |
---|
465 | nobs=nvis*mnp; |
---|
466 | nvars=m*cnp + n*pnp; |
---|
467 | if(nobs<nvars){ |
---|
468 | fprintf(stderr, "sba_motstr_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars); |
---|
469 | exit(1); |
---|
470 | } |
---|
471 | |
---|
472 | /* allocate work arrays */ |
---|
473 | jac=(double *)emalloc(nvis*(Asz+Bsz)*sizeof(double)); |
---|
474 | U=(double *)emalloc(m*Usz*sizeof(double)); |
---|
475 | V=(double *)emalloc(n*Vsz*sizeof(double)); |
---|
476 | V_1=(double *)emalloc(n*Vsz*sizeof(double)); |
---|
477 | e=(double *)emalloc(nobs*sizeof(double)); |
---|
478 | eab=(double *)emalloc(nvars*sizeof(double)); |
---|
479 | E=(double *)emalloc(m*cnp*sizeof(double)); |
---|
480 | W=(double *)emalloc(nvis*Wsz*sizeof(double)); |
---|
481 | Y=(double *)emalloc(nvis*Ysz*sizeof(double)); |
---|
482 | YWt=(double *)emalloc(YWtsz*sizeof(double)); |
---|
483 | S=(double *)emalloc(m*m*Sblsz*sizeof(double)); |
---|
484 | dp=(double *)emalloc(nvars*sizeof(double)); |
---|
485 | Wtda=(double *)emalloc(pnp*sizeof(double)); |
---|
486 | rcidxs=(int *)emalloc(maxnm*sizeof(int)); |
---|
487 | rcsubs=(int *)emalloc(maxnm*sizeof(int)); |
---|
488 | |
---|
489 | |
---|
490 | hx=(double *)emalloc(nobs*sizeof(double)); |
---|
491 | diagUV=(double *)emalloc(nvars*sizeof(double)); |
---|
492 | pdp=(double *)emalloc(nvars*sizeof(double)); |
---|
493 | |
---|
494 | |
---|
495 | /* set up auxiliary pointers */ |
---|
496 | pa=p; pb=p+m*cnp; |
---|
497 | jaca=jac; jacb=jac+nvis*Asz; |
---|
498 | ea=eab; eb=eab+m*cnp; |
---|
499 | dpa=dp; dpb=dp+m*cnp; |
---|
500 | |
---|
501 | diagU=diagUV; diagV=diagUV + m*cnp; |
---|
502 | |
---|
503 | |
---|
504 | /* allocate & fill up the idxij structure */ |
---|
505 | sba_crsm_alloc(&idxij, n, m, nvis); |
---|
506 | for(i=k=0; i<n; ++i){ |
---|
507 | idxij.rowptr[i]=k; |
---|
508 | ii=i*m; |
---|
509 | for(j=0; j<m; ++j) |
---|
510 | if(vmask[ii+j]){ |
---|
511 | idxij.val[k]=k; |
---|
512 | idxij.colidx[k++]=j; |
---|
513 | } |
---|
514 | } |
---|
515 | idxij.rowptr[n]=nvis; |
---|
516 | |
---|
517 | /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ |
---|
518 | if(!fjac){ |
---|
519 | fdj_data.func=func; |
---|
520 | fdj_data.cnp=cnp; |
---|
521 | fdj_data.pnp=pnp; |
---|
522 | fdj_data.mnp=mnp; |
---|
523 | fdj_data.hx=hx; |
---|
524 | fdj_data.hxx=(double *)emalloc(nobs*sizeof(double)); |
---|
525 | fdj_data.func_rcidxs=(int *)emalloc(2*maxnm*sizeof(int)); |
---|
526 | fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm; |
---|
527 | fdj_data.adata=adata; |
---|
528 | |
---|
529 | fjac=sba_fdjac_x; |
---|
530 | jac_adata=(void *)&fdj_data; |
---|
531 | } |
---|
532 | else{ |
---|
533 | fdj_data.hxx=NULL; |
---|
534 | jac_adata=adata; |
---|
535 | } |
---|
536 | |
---|
537 | if(itmax==0){ /* verify jacobian */ |
---|
538 | sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, pnp, mnp, adata, jac_adata); |
---|
539 | retval=0; |
---|
540 | goto freemem_and_return; |
---|
541 | } |
---|
542 | |
---|
543 | /* compute the error vectors e_ij in hx */ |
---|
544 | (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1; |
---|
545 | /* compute e=x - f(p) and its L2 norm */ |
---|
546 | for(i=0, p_eL2=0.0; i<nobs; ++i){ |
---|
547 | e[i]=tmp=x[i]-hx[i]; |
---|
548 | p_eL2+=tmp*tmp; |
---|
549 | } |
---|
550 | |
---|
551 | if(verbose) printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2/nvis); |
---|
552 | init_p_eL2=p_eL2; |
---|
553 | |
---|
554 | for(itno=stop=0; itno<itmax && !stop; ++itno){ |
---|
555 | /* Note that p, e and ||e||_2 have been updated at the previous iteration */ |
---|
556 | |
---|
557 | /* compute derivative submatrices A_ij, B_ij */ |
---|
558 | (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev; |
---|
559 | |
---|
560 | /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! |
---|
561 | /* U_j is symmetric, therefore its computation can be speeded up by |
---|
562 | * computing only the upper part and then reusing it for the lower one. |
---|
563 | * Recall that A_ij is mnp x cnp |
---|
564 | */ |
---|
565 | /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! |
---|
566 | /* Recall that e_ij is mnp x 1 |
---|
567 | */ |
---|
568 | _dblzero(U, m*Usz); /* clear all U_j */ |
---|
569 | _dblzero(ea, m*easz); /* clear all ea_j */ |
---|
570 | for(j=mcon; j<m; ++j){ |
---|
571 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
572 | ptr2=ea + j*easz; // set ptr2 to point to ea_j |
---|
573 | |
---|
574 | nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ |
---|
575 | for(i=0; i<nnz; ++i){ |
---|
576 | /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ |
---|
577 | ptr3=jaca + idxij.val[rcidxs[i]]*Asz; |
---|
578 | |
---|
579 | /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ |
---|
580 | for(ii=0; ii<cnp; ++ii){ |
---|
581 | for(jj=ii; jj<cnp; ++jj){ |
---|
582 | for(k=0, sum=0.0; k<mnp; ++k) |
---|
583 | sum+=ptr3[k*cnp+ii]*ptr3[k*cnp+jj]; |
---|
584 | ptr1[ii*cnp+jj]+=sum; |
---|
585 | } |
---|
586 | |
---|
587 | /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ |
---|
588 | for(jj=0; jj<ii; ++jj) |
---|
589 | ptr1[ii*cnp+jj]=ptr1[jj*cnp+ii]; |
---|
590 | } |
---|
591 | |
---|
592 | ptr4=e + idxij.val[rcidxs[i]]*esz; /* set ptr4 to point to e_ij */ |
---|
593 | /* compute A_ij^T e_ij and add it to ea_j */ |
---|
594 | for(ii=0; ii<cnp; ++ii){ |
---|
595 | for(jj=0, sum=0.0; jj<mnp; ++jj) |
---|
596 | sum+=ptr3[jj*cnp+ii]*ptr4[jj]; |
---|
597 | ptr2[ii]+=sum; |
---|
598 | } |
---|
599 | } |
---|
600 | } |
---|
601 | |
---|
602 | /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! |
---|
603 | /* V_i is symmetric, therefore its computation can be speeded up by |
---|
604 | * computing only the upper part and then reusing it for the lower one. |
---|
605 | * Recall that B_ij is mnp x pnp |
---|
606 | */ |
---|
607 | /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! |
---|
608 | /* Recall that e_ij is mnp x 1 |
---|
609 | */ |
---|
610 | _dblzero(V, n*Vsz); /* clear all V_i */ |
---|
611 | _dblzero(eb, n*ebsz); /* clear all eb_i */ |
---|
612 | for(i=0; i<n; ++i){ |
---|
613 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
614 | ptr2=eb + i*ebsz; // set ptr2 to point to eb_i |
---|
615 | |
---|
616 | nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ |
---|
617 | for(j=0; j<nnz; ++j){ |
---|
618 | /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ |
---|
619 | ptr3=jacb + idxij.val[rcidxs[j]]*Bsz; |
---|
620 | |
---|
621 | /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ |
---|
622 | for(ii=0; ii<pnp; ++ii){ |
---|
623 | for(jj=ii; jj<pnp; ++jj){ |
---|
624 | for(k=0, sum=0.0; k<mnp; ++k) |
---|
625 | sum+=ptr3[k*pnp+ii]*ptr3[k*pnp+jj]; |
---|
626 | ptr1[ii*pnp+jj]+=sum; |
---|
627 | } |
---|
628 | |
---|
629 | /* copy the LOWER TRIANGULAR PART of V_i from the upper one */ |
---|
630 | for(jj=0; jj<ii; ++jj) |
---|
631 | ptr1[ii*pnp+jj]=ptr1[jj*pnp+ii]; |
---|
632 | } |
---|
633 | |
---|
634 | ptr4=e + idxij.val[rcidxs[j]]*esz; /* set ptr4 to point to e_ij */ |
---|
635 | /* compute B_ij^T e_ij and add it to eb_i */ |
---|
636 | for(ii=0; ii<pnp; ++ii){ |
---|
637 | for(jj=0, sum=0.0; jj<mnp; ++jj) |
---|
638 | sum+=ptr3[jj*pnp+ii]*ptr4[jj]; |
---|
639 | ptr2[ii]+=sum; |
---|
640 | } |
---|
641 | } |
---|
642 | } |
---|
643 | |
---|
644 | /* compute W_ij = A_ij^T B_ij */ // \Sigma here! |
---|
645 | /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp |
---|
646 | */ |
---|
647 | _dblzero(W, nvis*Wsz); /* clear all W_ij */ |
---|
648 | for(i=0; i<n; ++i){ |
---|
649 | nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ |
---|
650 | for(j=0; j<nnz; ++j){ |
---|
651 | /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */ |
---|
652 | if(rcsubs[j]<mcon) continue; /* A_ij is zero */ |
---|
653 | |
---|
654 | ptr1=W + idxij.val[rcidxs[j]]*Wsz; |
---|
655 | /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */ |
---|
656 | ptr2=jaca + idxij.val[rcidxs[j]]*Asz; |
---|
657 | ptr3=jacb + idxij.val[rcidxs[j]]*Bsz; |
---|
658 | /* compute A_ij^T B_ij and store it in W_ij */ |
---|
659 | for(ii=0; ii<cnp; ++ii) |
---|
660 | for(jj=0; jj<pnp; ++jj){ |
---|
661 | for(k=0, sum=0.0; k<mnp; ++k) |
---|
662 | sum+=ptr2[k*cnp+ii]*ptr3[k*pnp+jj]; |
---|
663 | ptr1[ii*pnp+jj]=sum; |
---|
664 | } |
---|
665 | } |
---|
666 | } |
---|
667 | |
---|
668 | /* Compute ||J^T e||_inf and ||p||^2 */ |
---|
669 | for(i=0, p_L2=eab_inf=0.0; i<nvars; ++i){ |
---|
670 | if(eab_inf < (tmp=FABS(eab[i]))) eab_inf=tmp; |
---|
671 | p_L2+=p[i]*p[i]; |
---|
672 | } |
---|
673 | //p_L2=sqrt(p_L2); |
---|
674 | |
---|
675 | /* save diagonal entries so that augmentation can be later canceled. |
---|
676 | * Diagonal entries are in U_j and V_i |
---|
677 | */ |
---|
678 | for(j=mcon; j<m; ++j){ |
---|
679 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
680 | ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j |
---|
681 | for(i=0; i<cnp; ++i) |
---|
682 | ptr2[i]=ptr1[i*cnp+i]; |
---|
683 | } |
---|
684 | for(i=0; i<n; ++i){ |
---|
685 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
686 | ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i |
---|
687 | for(j=0; j<pnp; ++j) |
---|
688 | ptr2[j]=ptr1[j*pnp+j]; |
---|
689 | } |
---|
690 | |
---|
691 | /* |
---|
692 | if(!(itno%100)){ |
---|
693 | printf("Current estimate: "); |
---|
694 | for(i=0; i<nvars; ++i) |
---|
695 | printf("%.9g ", p[i]); |
---|
696 | printf("-- errors %.9g %0.9g\n", eab_inf, p_eL2); |
---|
697 | } |
---|
698 | */ |
---|
699 | |
---|
700 | /* check for convergence */ |
---|
701 | if((eab_inf <= eps1)){ |
---|
702 | dp_L2=0.0; /* no increment for p in this case */ |
---|
703 | stop=1; |
---|
704 | break; |
---|
705 | } |
---|
706 | |
---|
707 | /* compute initial damping factor */ |
---|
708 | if(itno==0){ |
---|
709 | for(i=mcon*cnp, tmp=DBL_MIN; i<nvars; ++i) |
---|
710 | if(diagUV[i]>tmp) tmp=diagUV[i]; /* find max diagonal element */ |
---|
711 | mu=tau*tmp; |
---|
712 | } |
---|
713 | |
---|
714 | /* determine increment using adaptive damping */ |
---|
715 | while(1){ |
---|
716 | /* augment U, V */ |
---|
717 | for(j=mcon; j<m; ++j){ |
---|
718 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
719 | for(i=0; i<cnp; ++i) |
---|
720 | ptr1[i*cnp+i]+=mu; |
---|
721 | } |
---|
722 | for(i=0; i<n; ++i){ |
---|
723 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
724 | for(j=0; j<pnp; ++j) |
---|
725 | ptr1[j*pnp+j]+=mu; |
---|
726 | |
---|
727 | /* compute V*_i^-1 */ |
---|
728 | ptr2=V_1 + i*Vsz; // set ptr2 to point to (V*_i)^-1 |
---|
729 | /* inverting V*_i with LU seems to be faster than Cholesky */ |
---|
730 | j=sba_mat_invert_LU(ptr1, ptr2, pnp); //invert the pnp x pnp matrix pointed to by ptr1 and save it in ptr2 |
---|
731 | //j=sba_mat_invert_Chol(ptr1, ptr2, pnp); //invert the pnp x pnp matrix pointed to by ptr1 and save it in ptr2 |
---|
732 | if(!j){ |
---|
733 | fprintf(stderr, "Singular matrix V*_i (i=%d) in sba_motstr_levmar_x()\n", i); |
---|
734 | exit(1); |
---|
735 | } |
---|
736 | } |
---|
737 | |
---|
738 | /* compute Y_ij = W_ij (V*_i)^-1 |
---|
739 | * Recall that W_ij is cnp x pnp and (V*_i) is pnp x pnp |
---|
740 | */ |
---|
741 | _dblzero(Y, nvis*Ysz); /* clear all Y_ij */ |
---|
742 | for(i=0; i<n; ++i){ |
---|
743 | /* set ptr3 to point to (V*_i)^-1 */ |
---|
744 | ptr3=V_1 + i*Vsz; |
---|
745 | nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ |
---|
746 | for(j=0; j<nnz; ++j){ |
---|
747 | /* set ptr1 to point to Y_ij, actual column number in rcsubs[j] */ |
---|
748 | if(rcsubs[j]<mcon) continue; /* W_ij is zero */ |
---|
749 | |
---|
750 | ptr1=Y + idxij.val[rcidxs[j]]*Ysz; |
---|
751 | /* set ptr2 to point to W_ij resp. */ |
---|
752 | ptr2=W + idxij.val[rcidxs[j]]*Wsz; |
---|
753 | /* compute W_ij (V*_i)^-1 and store it in Y_ij */ |
---|
754 | for(ii=0; ii<cnp; ++ii) |
---|
755 | for(jj=0; jj<pnp; ++jj){ |
---|
756 | for(k=0, sum=0.0; k<pnp; ++k) |
---|
757 | sum+=ptr2[ii*pnp+k]*ptr3[k*pnp+jj]; |
---|
758 | ptr1[ii*pnp+jj]=sum; |
---|
759 | } |
---|
760 | } |
---|
761 | } |
---|
762 | |
---|
763 | _dblzero(E, m*easz); /* clear all e_j */ |
---|
764 | /* compute the mmcon x mmcon block matrix S and e_j */ |
---|
765 | |
---|
766 | /* Note that S is symmetric, therefore its computation can be |
---|
767 | * speeded up by computing only the upper part and then reusing |
---|
768 | * it for the lower one. |
---|
769 | */ |
---|
770 | for(j=mcon; j<m; ++j){ |
---|
771 | nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero Y_ij, i=0...n-1 */ |
---|
772 | |
---|
773 | /* compute the UPPER TRIANGULAR PART of S */ |
---|
774 | for(k=j; k<m; ++k){ // j>=mcon |
---|
775 | /* compute \sum_i Y_ij W_ik^T in YWt */ |
---|
776 | /* Recall that Y_ij is cnp x pnp and W_ik is cnp x pnp */ |
---|
777 | _dblzero(YWt, YWtsz); /* clear YWt */ |
---|
778 | |
---|
779 | for(i=0; i<nnz; ++i){ |
---|
780 | register double *pYWt; |
---|
781 | |
---|
782 | /* find the min and max column indices of the elements in row i (actually rcsubs[i]) |
---|
783 | * and make sure that k falls within them. This test handles W_ik's which are |
---|
784 | * certain to be zero without bothering to call sba_crsm_elmidx() |
---|
785 | */ |
---|
786 | ii=idxij.colidx[idxij.rowptr[rcsubs[i]]]; |
---|
787 | jj=idxij.colidx[idxij.rowptr[rcsubs[i]+1]-1]; |
---|
788 | if(k<ii || k>jj) continue; /* W_ik == 0 */ |
---|
789 | |
---|
790 | /* set ptr2 to point to W_ik */ |
---|
791 | l=sba_crsm_elmidx(&idxij, rcsubs[i], k); |
---|
792 | if(l==-1) continue; /* W_ik == 0 */ |
---|
793 | |
---|
794 | ptr2=W + idxij.val[l]*Wsz; |
---|
795 | /* set ptr1 to point to Y_ij, actual row number in rcsubs[i] */ |
---|
796 | ptr1=Y + idxij.val[rcidxs[i]]*Ysz; |
---|
797 | for(ii=0; ii<cnp; ++ii){ |
---|
798 | ptr3=ptr1+ii*pnp; |
---|
799 | pYWt=YWt+ii*cnp; |
---|
800 | |
---|
801 | for(jj=0; jj<cnp; ++jj){ |
---|
802 | ptr4=ptr2+jj*pnp; |
---|
803 | for(l=0, sum=0.0; l<pnp; ++l) |
---|
804 | sum+=ptr3[l]*ptr4[l]; //ptr1[ii*pnp+l]*ptr2[jj*pnp+l]; |
---|
805 | pYWt[jj]+=sum; //YWt[ii*cnp+jj]+=sum; |
---|
806 | } |
---|
807 | } |
---|
808 | } |
---|
809 | |
---|
810 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
811 | |
---|
812 | /* since the linear system involving S is solved with lapack, |
---|
813 | * it is preferable to store S in column major (i.e. fortran) |
---|
814 | * order, so as to avoid unecessary transposing/copying. |
---|
815 | */ |
---|
816 | #if MAT_STORAGE==COLUMN_MAJOR |
---|
817 | ptr2=S + (k-mcon)*mmcon*Usz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S |
---|
818 | #else |
---|
819 | ptr2=S + (j-mcon)*mmcon*Usz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block j,k in S |
---|
820 | #endif |
---|
821 | |
---|
822 | if(j!=k){ /* Kronecker */ |
---|
823 | for(ii=0; ii<cnp; ++ii, ptr2+=Sdim) |
---|
824 | for(jj=0; jj<cnp; ++jj) |
---|
825 | ptr2[jj]= |
---|
826 | #if MAT_STORAGE==COLUMN_MAJOR |
---|
827 | -YWt[jj*cnp+ii]; |
---|
828 | #else |
---|
829 | -YWt[ii*cnp+jj]; |
---|
830 | #endif |
---|
831 | } |
---|
832 | else{ |
---|
833 | for(ii=0; ii<cnp; ++ii, ptr2+=Sdim) |
---|
834 | for(jj=0; jj<cnp; ++jj) |
---|
835 | ptr2[jj]= |
---|
836 | #if MAT_STORAGE==COLUMN_MAJOR |
---|
837 | ptr1[jj*cnp+ii] - YWt[jj*cnp+ii]; |
---|
838 | #else |
---|
839 | ptr1[ii*cnp+jj] - YWt[ii*cnp+jj]; |
---|
840 | #endif |
---|
841 | } |
---|
842 | } |
---|
843 | |
---|
844 | /* copy the LOWER TRIANGULAR PART of S from the upper one */ |
---|
845 | for(k=mcon; k<j; ++k){ |
---|
846 | #if MAT_STORAGE==COLUMN_MAJOR |
---|
847 | ptr1=S + (k-mcon)*mmcon*Usz + (j-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S |
---|
848 | ptr2=S + (j-mcon)*mmcon*Usz + (k-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S |
---|
849 | #else |
---|
850 | ptr1=S + (j-mcon)*mmcon*Usz + (k-mcon)*cnp; // set ptr1 to point to the beginning of block j,k in S |
---|
851 | ptr2=S + (k-mcon)*mmcon*Usz + (j-mcon)*cnp; // set ptr2 to point to the beginning of block k,j in S |
---|
852 | #endif |
---|
853 | for(ii=0; ii<cnp; ++ii, ptr1+=Sdim) |
---|
854 | for(jj=0, ptr3=ptr2+ii; jj<cnp; ++jj, ptr3+=Sdim) |
---|
855 | ptr1[jj]=*ptr3; |
---|
856 | } |
---|
857 | |
---|
858 | /* compute e_j=ea_j - \sum_i Y_ij eb_i */ |
---|
859 | /* Recall that Y_ij is cnp x pnp and eb_i is pnp x 1 */ |
---|
860 | ptr1=E + j*easz; // set ptr1 to point to e_j |
---|
861 | |
---|
862 | for(i=0; i<nnz; ++i){ |
---|
863 | /* set ptr2 to point to Y_ij, actual row number in rcsubs[i] */ |
---|
864 | ptr2=Y + idxij.val[rcidxs[i]]*Ysz; |
---|
865 | |
---|
866 | /* set ptr3 to point to eb_i */ |
---|
867 | ptr3=eb + rcsubs[i]*ebsz; |
---|
868 | for(ii=0; ii<cnp; ++ii){ |
---|
869 | ptr4=ptr2+ii*pnp; |
---|
870 | for(jj=0, sum=0.0; jj<pnp; ++jj) |
---|
871 | sum+=ptr4[jj]*ptr3[jj]; //ptr2[ii*pnp+jj]*ptr3[jj]; |
---|
872 | ptr1[ii]+=sum; |
---|
873 | } |
---|
874 | } |
---|
875 | |
---|
876 | ptr2=ea + j*easz; // set ptr2 to point to ea_j |
---|
877 | for(i=0; i<easz; ++i) |
---|
878 | ptr1[i]=ptr2[i] - ptr1[i]; |
---|
879 | } |
---|
880 | |
---|
881 | #if 0 |
---|
882 | if(verbose>1){ /* count the nonzeros in S */ |
---|
883 | for(i=ii=0; i<Sdim*Sdim; ++i) |
---|
884 | if(S[i]!=0.0) ++ii; |
---|
885 | printf("\nS density %10g\n", ((double)ii)/(Sdim*Sdim)); |
---|
886 | |
---|
887 | } |
---|
888 | #endif |
---|
889 | |
---|
890 | /* solve the linear system S dpa = E to compute the da_j. |
---|
891 | * |
---|
892 | * Note that if MAT_STORAGE==1 S is modified in the following call; |
---|
893 | * this is OK since S is recomputed for each iteration |
---|
894 | */ |
---|
895 | //issolved=sba_Axb_LU(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss; |
---|
896 | issolved=sba_Axb_Chol(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss; |
---|
897 | //issolved=sba_Axb_QRnoQ(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss; |
---|
898 | //issolved=sba_Axb_QR(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss; |
---|
899 | //issolved=sba_Axb_SVD(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, MAT_STORAGE); ++nlss; |
---|
900 | //issolved=sba_Axb_CG(S, E+mcon*cnp, dpa+mcon*cnp, Sdim, (3*Sdim)/2, 1E-10, SBA_CG_JACOBI, MAT_STORAGE); ++nlss; |
---|
901 | |
---|
902 | _dblzero(dpa, mcon*cnp); /* no change for the first mcon camera params */ |
---|
903 | |
---|
904 | if(issolved){ |
---|
905 | |
---|
906 | /* compute the db_i */ |
---|
907 | for(i=0; i<n; ++i){ |
---|
908 | ptr1=dpb + i*ebsz; // set ptr1 to point to db_i |
---|
909 | |
---|
910 | /* compute \sum_j W_ij^T da_j */ |
---|
911 | /* Recall that W_ij is cnp x pnp and da_j is cnp x 1 */ |
---|
912 | _dblzero(Wtda, Wtdasz); /* clear Wtda */ |
---|
913 | nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ |
---|
914 | for(j=0; j<nnz; ++j){ |
---|
915 | /* set ptr2 to point to W_ij, actual column number in rcsubs[j] */ |
---|
916 | if(rcsubs[j]<mcon) continue; /* W_ij is zero */ |
---|
917 | |
---|
918 | ptr2=W + idxij.val[rcidxs[j]]*Wsz; |
---|
919 | |
---|
920 | /* set ptr3 to point to da_j */ |
---|
921 | ptr3=dpa + rcsubs[j]*cnp; |
---|
922 | |
---|
923 | for(ii=0; ii<pnp; ++ii){ |
---|
924 | ptr4=ptr2+ii; |
---|
925 | for(jj=0, sum=0.0; jj<cnp; ++jj) |
---|
926 | sum+=ptr4[jj*pnp]*ptr3[jj]; //ptr2[jj*pnp+ii]*ptr3[jj]; |
---|
927 | Wtda[ii]+=sum; |
---|
928 | } |
---|
929 | } |
---|
930 | |
---|
931 | /* compute eb_i - \sum_j W_ij^T da_j = eb_i - Wtda in Wtda */ |
---|
932 | ptr2=eb + i*ebsz; // set ptr2 to point to eb_i |
---|
933 | for(ii=0; ii<pnp; ++ii) |
---|
934 | Wtda[ii]=ptr2[ii] - Wtda[ii]; |
---|
935 | |
---|
936 | /* compute the product (V*_i)^-1 Wtda = (V*_i)^-1 (eb_i - \sum_j W_ij^T da_j) */ |
---|
937 | ptr2=V_1 + i*Vsz; // set ptr2 to point to (V*_i)^-1 |
---|
938 | for(ii=0; ii<pnp; ++ii){ |
---|
939 | for(jj=0, sum=0.0; jj<pnp; ++jj) |
---|
940 | sum+=ptr2[ii*pnp+jj]*Wtda[jj]; |
---|
941 | ptr1[ii]=sum; |
---|
942 | } |
---|
943 | } |
---|
944 | |
---|
945 | /* parameter vector updates are now in dpa, dpb */ |
---|
946 | |
---|
947 | /* compute p's new estimate and ||dp||^2 */ |
---|
948 | for(i=0, dp_L2=0.0; i<nvars; ++i){ |
---|
949 | pdp[i]=p[i] + (tmp=dp[i]); |
---|
950 | dp_L2+=tmp*tmp; |
---|
951 | } |
---|
952 | //dp_L2=sqrt(dp_L2); |
---|
953 | |
---|
954 | if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ |
---|
955 | //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ |
---|
956 | stop=2; |
---|
957 | break; |
---|
958 | } |
---|
959 | |
---|
960 | if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */ |
---|
961 | //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ |
---|
962 | stop=4; |
---|
963 | break; |
---|
964 | } |
---|
965 | |
---|
966 | (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */ |
---|
967 | if(verbose>1) |
---|
968 | printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); |
---|
969 | for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute ||e(pdp)||_2 */ |
---|
970 | hx[i]=tmp=x[i]-hx[i]; |
---|
971 | pdp_eL2+=tmp*tmp; |
---|
972 | } |
---|
973 | |
---|
974 | for(i=0, dL=0.0; i<nvars; ++i) |
---|
975 | dL+=dp[i]*(mu*dp[i]+eab[i]); |
---|
976 | |
---|
977 | dF=p_eL2-pdp_eL2; |
---|
978 | |
---|
979 | if(verbose>1) |
---|
980 | printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2); |
---|
981 | |
---|
982 | if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */ |
---|
983 | tmp=(2.0*dF/dL-1.0); |
---|
984 | tmp=1.0-tmp*tmp*tmp; |
---|
985 | mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD ); |
---|
986 | nu=2; |
---|
987 | |
---|
988 | for(i=0; i<nvars; ++i) /* update p's estimate */ |
---|
989 | p[i]=pdp[i]; |
---|
990 | |
---|
991 | for(i=0; i<nobs; ++i) /* update e and ||e||_2 */ |
---|
992 | e[i]=hx[i]; |
---|
993 | p_eL2=pdp_eL2; |
---|
994 | break; |
---|
995 | } |
---|
996 | } /* issolved */ |
---|
997 | |
---|
998 | /* if this point is reached, either the linear system could not be solved or |
---|
999 | * the error did not reduce; in any case, the increment must be rejected |
---|
1000 | */ |
---|
1001 | |
---|
1002 | mu*=nu; |
---|
1003 | nu2=nu<<1; // 2*nu; |
---|
1004 | if(nu2<=nu){ /* nu has wrapped around (overflown) */ |
---|
1005 | fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_motstr_levmar_x()! Singular hessian matrix?\n"); |
---|
1006 | //exit(1); |
---|
1007 | stop=6; |
---|
1008 | break; |
---|
1009 | } |
---|
1010 | nu=nu2; |
---|
1011 | |
---|
1012 | /* restore U, V diagonal entries */ |
---|
1013 | for(j=mcon; j<m; ++j){ |
---|
1014 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1015 | ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j |
---|
1016 | for(i=0; i<cnp; ++i) |
---|
1017 | ptr1[i*cnp+i]=ptr2[i]; |
---|
1018 | } |
---|
1019 | for(i=0; i<n; ++i){ |
---|
1020 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1021 | ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i |
---|
1022 | for(j=0; j<pnp; ++j) |
---|
1023 | ptr1[j*pnp+j]=ptr2[j]; |
---|
1024 | } |
---|
1025 | } /* inner loop */ |
---|
1026 | |
---|
1027 | if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop |
---|
1028 | } |
---|
1029 | |
---|
1030 | if(itno>=itmax) stop=3; |
---|
1031 | |
---|
1032 | /* restore U, V diagonal entries */ |
---|
1033 | for(j=mcon; j<m; ++j){ |
---|
1034 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1035 | ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j |
---|
1036 | for(i=0; i<cnp; ++i) |
---|
1037 | ptr1[i*cnp+i]=ptr2[i]; |
---|
1038 | } |
---|
1039 | for(i=0; i<n; ++i){ |
---|
1040 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1041 | ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i |
---|
1042 | for(j=0; j<pnp; ++j) |
---|
1043 | ptr1[j*pnp+j]=ptr2[j]; |
---|
1044 | } |
---|
1045 | |
---|
1046 | if(info){ |
---|
1047 | info[0]=init_p_eL2; |
---|
1048 | info[1]=p_eL2; |
---|
1049 | info[2]=eab_inf; |
---|
1050 | info[3]=dp_L2; |
---|
1051 | for(j=mcon, tmp=DBL_MIN; j<m; ++j){ |
---|
1052 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1053 | for(i=0; i<cnp; ++i) |
---|
1054 | if(tmp<ptr1[i*cnp+i]) tmp=ptr1[i*cnp+i]; |
---|
1055 | } |
---|
1056 | for(i=0; i<n; ++i){ |
---|
1057 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1058 | for(j=0; j<pnp; ++j) |
---|
1059 | if(tmp<ptr1[j*pnp+j]) tmp=ptr1[j*pnp+j]; |
---|
1060 | } |
---|
1061 | info[4]=mu/tmp; |
---|
1062 | info[5]=itno; |
---|
1063 | info[6]=stop; |
---|
1064 | info[7]=nfev; |
---|
1065 | info[8]=njev; |
---|
1066 | info[9]=nlss; |
---|
1067 | } |
---|
1068 | |
---|
1069 | //sba_print_sol(n, m, p, cnp, pnp, x, mnp, &idxij, rcidxs, rcsubs); |
---|
1070 | retval=(stop!=4)? itno : -1; |
---|
1071 | |
---|
1072 | freemem_and_return: /* NOTE: this point is also reached via a goto! */ |
---|
1073 | |
---|
1074 | /* free whatever was allocated */ |
---|
1075 | free(jac); free(U); free(V); |
---|
1076 | free(V_1); free(e); free(eab); |
---|
1077 | free(E); free(W); free(Y); |
---|
1078 | free(YWt); free(S); free(dp); |
---|
1079 | free(Wtda); |
---|
1080 | free(rcidxs); free(rcsubs); |
---|
1081 | |
---|
1082 | free(hx); free(diagUV); free(pdp); |
---|
1083 | if(fdj_data.hxx){ // cleanup |
---|
1084 | free(fdj_data.hxx); |
---|
1085 | free(fdj_data.func_rcidxs); |
---|
1086 | } |
---|
1087 | |
---|
1088 | sba_crsm_free(&idxij); |
---|
1089 | |
---|
1090 | return retval; |
---|
1091 | } |
---|
1092 | |
---|
1093 | |
---|
1094 | /* Bundle adjustment on camera parameters only |
---|
1095 | * using the sparse Levenberg-Marquardt as described in HZ p. 568 |
---|
1096 | */ |
---|
1097 | |
---|
1098 | int sba_mot_levmar_x( |
---|
1099 | const int n, /* number of points */ |
---|
1100 | const int m, /* number of images */ |
---|
1101 | const int mcon,/* number of images (starting from the 1st) whose parameters should not be modified. |
---|
1102 | * All A_ij (see below) with j<mcon are assumed to be zero |
---|
1103 | */ |
---|
1104 | char *vmask, /* visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */ |
---|
1105 | double *p, /* initial parameter vector p0: (a1, ..., am). |
---|
1106 | * aj are the image j parameters, size m*cnp */ |
---|
1107 | const int cnp,/* number of parameters for ONE camera; e.g. 6 for Euclidean cameras */ |
---|
1108 | double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where |
---|
1109 | * x_ij is the projection of the i-th point on the j-th image. |
---|
1110 | * NOTE: some of the x_ij might be missing, if point i is not visible in image j; |
---|
1111 | * see vmask[i][j], max. size n*m*mnp |
---|
1112 | */ |
---|
1113 | const int mnp,/* number of parameters for EACH measurement; usually 2 */ |
---|
1114 | void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), |
---|
1115 | /* functional relation describing measurements. Given a parameter vector p, |
---|
1116 | * computes a prediction of the measurements \hat{x}. p is (m*cnp)x1, |
---|
1117 | * \hat{x} is (n*m*mnp)x1, maximum |
---|
1118 | * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used |
---|
1119 | * as working memory |
---|
1120 | */ |
---|
1121 | void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), |
---|
1122 | /* function to evaluate the sparse jacobian dX/dp. |
---|
1123 | * The Jacobian is returned in jac as |
---|
1124 | * (dx_11/da_1, ..., dx_1m/da_m, ..., dx_n1/da_1, ..., dx_nm/da_m), or (using HZ's notation), |
---|
1125 | * jac=(A_11, ..., A_1m, ..., A_n1, ..., A_nm) |
---|
1126 | * Notice that depending on idxij, some of the A_ij might be missing. |
---|
1127 | * Note also that the A_ij are mnp x cnp matrices and they |
---|
1128 | * should be stored in jac in row-major order. |
---|
1129 | * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used |
---|
1130 | * as working memory |
---|
1131 | * |
---|
1132 | * If NULL, the jacobian is approximated by repetitive func calls and finite |
---|
1133 | * differences. This is computationally inefficient and thus NOT recommended. |
---|
1134 | */ |
---|
1135 | void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */ |
---|
1136 | |
---|
1137 | int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */ |
---|
1138 | int verbose, /* I: verbosity */ |
---|
1139 | double opts[SBA_OPTSSZ], |
---|
1140 | /* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu, |
---|
1141 | * stopping thresholds for ||J^T e||_inf, ||dp||_2 and ||e||_2 |
---|
1142 | */ |
---|
1143 | double info[SBA_INFOSZ] |
---|
1144 | /* O: information regarding the minimization. Set to NULL if don't care |
---|
1145 | * info[0]=||e||_2 at initial p. |
---|
1146 | * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. |
---|
1147 | * info[5]= # iterations, |
---|
1148 | * info[6]=reason for terminating: 1 - stopped by small gradient J^T e |
---|
1149 | * 2 - stopped by small dp |
---|
1150 | * 3 - stopped by itmax |
---|
1151 | * 4 - singular matrix. Restart from current p with increased mu |
---|
1152 | * 5 - stopped by small ||e||_2 |
---|
1153 | * 6 - too many attempts to increase damping. Restart with increased mu |
---|
1154 | * info[7]= # function evaluations |
---|
1155 | * info[8]= # jacobian evaluations |
---|
1156 | * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error |
---|
1157 | */ |
---|
1158 | ) |
---|
1159 | { |
---|
1160 | register int i, j, ii, jj, k; |
---|
1161 | int nvis, nnz, retval; |
---|
1162 | |
---|
1163 | /* The following are work arrays that are dynamically allocated by sba_mot_levmar_x() */ |
---|
1164 | double *jac; /* work array for storing the jacobian, max. size n*m*mnp*cnp */ |
---|
1165 | double *U; /* work array for storing the U_j in the order U_1, ..., U_m, size m*cnp*cnp */ |
---|
1166 | |
---|
1167 | double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm, |
---|
1168 | max. size n*m*mnp */ |
---|
1169 | double *ea; /* work array for storing the ea_j in the order ea_1, .. ea_m, size m*cnp */ |
---|
1170 | |
---|
1171 | double *dp; /* work array for storing the parameter vector updates da_1, ..., da_m, size m*cnp */ |
---|
1172 | |
---|
1173 | /* Of the above arrays, jac, e are sparse and |
---|
1174 | * U, ea, dp are dense. Sparse arrays are indexed through |
---|
1175 | * idxij (see below), that is with the same mechanism as the input |
---|
1176 | * measurements vector x |
---|
1177 | */ |
---|
1178 | |
---|
1179 | /* submatrices sizes */ |
---|
1180 | int Asz, Usz, |
---|
1181 | esz, easz; |
---|
1182 | |
---|
1183 | register double *ptr1, *ptr2, *ptr3, *ptr4, sum; |
---|
1184 | struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location of A_ij |
---|
1185 | * in jac, e_ij in e, etc. |
---|
1186 | * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous |
---|
1187 | * index k and it is used to efficiently lookup the memory locations where the non-zero |
---|
1188 | * blocks of a sparse matrix/vector are stored |
---|
1189 | */ |
---|
1190 | int maxnm=(n>=m)? n:m, /* max. of (n, m) */ |
---|
1191 | *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or |
---|
1192 | column in a sparse matrix, size max(n, m) */ |
---|
1193 | *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a |
---|
1194 | sparse matrix, size max(n, m) */ |
---|
1195 | |
---|
1196 | /* The following variables are needed by the LM algorithm */ |
---|
1197 | register int itno; /* iteration counter */ |
---|
1198 | int nsolved; |
---|
1199 | /* temporary work arrays that are dynamically allocated */ |
---|
1200 | double *hx, /* \hat{x}_i, max. size m*n*mnp */ |
---|
1201 | *diagU, /* diagonals of U_j, size m*cnp */ |
---|
1202 | *pdp; /* p + dp, size m*cnp */ |
---|
1203 | |
---|
1204 | register double mu, /* damping constant */ |
---|
1205 | tmp; /* mainly used in matrix & vector multiplications */ |
---|
1206 | double p_eL2, ea_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */ |
---|
1207 | double p_L2, dp_L2=DBL_MAX, dF, dL; |
---|
1208 | double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2], eps3_sq=opts[3]*opts[3]; |
---|
1209 | double init_p_eL2; |
---|
1210 | int nu=2, nu2, stop, nfev, njev=0, nlss=0; |
---|
1211 | int nobs, nvars; |
---|
1212 | |
---|
1213 | struct fdj_data_x_ fdj_data; |
---|
1214 | void *jac_adata; |
---|
1215 | |
---|
1216 | /* Initialization */ |
---|
1217 | |
---|
1218 | /* block sizes */ |
---|
1219 | Asz=mnp * cnp; Usz=cnp * cnp; |
---|
1220 | esz=mnp; easz=cnp; |
---|
1221 | |
---|
1222 | /* count total number of visible image points */ |
---|
1223 | for(i=nvis=0, jj=n*m; i<jj; ++i) |
---|
1224 | nvis+=vmask[i]; |
---|
1225 | |
---|
1226 | nobs=nvis*mnp; |
---|
1227 | nvars=m*cnp; |
---|
1228 | if(nobs<nvars){ |
---|
1229 | fprintf(stderr, "sba_mot_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars); |
---|
1230 | exit(1); |
---|
1231 | } |
---|
1232 | |
---|
1233 | /* allocate work arrays */ |
---|
1234 | jac=(double *)emalloc(nvis*Asz*sizeof(double)); |
---|
1235 | U=(double *)emalloc(m*Usz*sizeof(double)); |
---|
1236 | e=(double *)emalloc(nobs*sizeof(double)); |
---|
1237 | ea=(double *)emalloc(nvars*sizeof(double)); |
---|
1238 | dp=(double *)emalloc(nvars*sizeof(double)); |
---|
1239 | rcidxs=(int *)emalloc(maxnm*sizeof(int)); |
---|
1240 | rcsubs=(int *)emalloc(maxnm*sizeof(int)); |
---|
1241 | |
---|
1242 | |
---|
1243 | hx=(double *)emalloc(nobs*sizeof(double)); |
---|
1244 | diagU=(double *)emalloc(nvars*sizeof(double)); |
---|
1245 | pdp=(double *)emalloc(nvars*sizeof(double)); |
---|
1246 | |
---|
1247 | /* allocate & fill up the idxij structure */ |
---|
1248 | sba_crsm_alloc(&idxij, n, m, nvis); |
---|
1249 | for(i=k=0; i<n; ++i){ |
---|
1250 | idxij.rowptr[i]=k; |
---|
1251 | ii=i*m; |
---|
1252 | for(j=0; j<m; ++j) |
---|
1253 | if(vmask[ii+j]){ |
---|
1254 | idxij.val[k]=k; |
---|
1255 | idxij.colidx[k++]=j; |
---|
1256 | } |
---|
1257 | } |
---|
1258 | idxij.rowptr[n]=nvis; |
---|
1259 | |
---|
1260 | /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ |
---|
1261 | if(!fjac){ |
---|
1262 | fdj_data.func=func; |
---|
1263 | fdj_data.cnp=cnp; |
---|
1264 | fdj_data.pnp=0; |
---|
1265 | fdj_data.mnp=mnp; |
---|
1266 | fdj_data.hx=hx; |
---|
1267 | fdj_data.hxx=(double *)emalloc(nobs*sizeof(double)); |
---|
1268 | fdj_data.func_rcidxs=(int *)emalloc(2*maxnm*sizeof(int)); |
---|
1269 | fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm; |
---|
1270 | fdj_data.adata=adata; |
---|
1271 | |
---|
1272 | fjac=sba_fdjac_x; |
---|
1273 | jac_adata=(void *)&fdj_data; |
---|
1274 | } |
---|
1275 | else{ |
---|
1276 | fdj_data.hxx=NULL; |
---|
1277 | jac_adata=adata; |
---|
1278 | } |
---|
1279 | |
---|
1280 | if(itmax==0){ /* verify jacobian */ |
---|
1281 | sba_mot_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, mnp, adata, jac_adata); |
---|
1282 | retval=0; |
---|
1283 | goto freemem_and_return; |
---|
1284 | } |
---|
1285 | |
---|
1286 | /* compute the error vectors e_ij in hx */ |
---|
1287 | (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1; |
---|
1288 | /* compute e=x - f(p) and its L2 norm */ |
---|
1289 | for(i=0, p_eL2=0.0; i<nobs; ++i){ |
---|
1290 | e[i]=tmp=x[i]-hx[i]; |
---|
1291 | p_eL2+=tmp*tmp; |
---|
1292 | } |
---|
1293 | |
---|
1294 | if(verbose) printf("initial mot-SBA error %g [%g]\n", p_eL2, p_eL2/nvis); |
---|
1295 | init_p_eL2=p_eL2; |
---|
1296 | |
---|
1297 | for(itno=stop=0; itno<itmax && !stop; ++itno){ |
---|
1298 | /* Note that p, e and ||e||_2 have been updated at the previous iteration */ |
---|
1299 | |
---|
1300 | /* compute derivative submatrices A_ij */ |
---|
1301 | (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev; |
---|
1302 | |
---|
1303 | /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! |
---|
1304 | /* U_j is symmetric, therefore its computation can be speeded up by |
---|
1305 | * computing only the upper part and then reusing it for the lower one. |
---|
1306 | * Recall that A_ij is mnp x cnp |
---|
1307 | */ |
---|
1308 | /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! |
---|
1309 | /* Recall that e_ij is mnp x 1 |
---|
1310 | */ |
---|
1311 | _dblzero(U, m*Usz); /* clear all U_j */ |
---|
1312 | _dblzero(ea, m*easz); /* clear all ea_j */ |
---|
1313 | for(j=mcon; j<m; ++j){ |
---|
1314 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1315 | ptr2=ea + j*easz; // set ptr2 to point to ea_j |
---|
1316 | |
---|
1317 | nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ |
---|
1318 | for(i=0; i<nnz; ++i){ |
---|
1319 | /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ |
---|
1320 | ptr3=jac + idxij.val[rcidxs[i]]*Asz; |
---|
1321 | |
---|
1322 | /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ |
---|
1323 | for(ii=0; ii<cnp; ++ii){ |
---|
1324 | for(jj=ii; jj<cnp; ++jj){ |
---|
1325 | for(k=0, sum=0.0; k<mnp; ++k) |
---|
1326 | sum+=ptr3[k*cnp+ii]*ptr3[k*cnp+jj]; |
---|
1327 | ptr1[ii*cnp+jj]+=sum; |
---|
1328 | } |
---|
1329 | |
---|
1330 | /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ |
---|
1331 | for(jj=0; jj<ii; ++jj) |
---|
1332 | ptr1[ii*cnp+jj]=ptr1[jj*cnp+ii]; |
---|
1333 | } |
---|
1334 | |
---|
1335 | ptr4=e + idxij.val[rcidxs[i]]*esz; /* set ptr4 to point to e_ij */ |
---|
1336 | /* compute A_ij^T e_ij and add it to ea_j */ |
---|
1337 | for(ii=0; ii<cnp; ++ii){ |
---|
1338 | for(jj=0, sum=0.0; jj<mnp; ++jj) |
---|
1339 | sum+=ptr3[jj*cnp+ii]*ptr4[jj]; |
---|
1340 | ptr2[ii]+=sum; |
---|
1341 | } |
---|
1342 | } |
---|
1343 | } |
---|
1344 | |
---|
1345 | /* Compute ||J^T e||_inf and ||p||^2 */ |
---|
1346 | for(i=0, p_L2=ea_inf=0.0; i<nvars; ++i){ |
---|
1347 | if(ea_inf < (tmp=FABS(ea[i]))) ea_inf=tmp; |
---|
1348 | p_L2+=p[i]*p[i]; |
---|
1349 | } |
---|
1350 | //p_L2=sqrt(p_L2); |
---|
1351 | |
---|
1352 | /* save diagonal entries so that augmentation can be later canceled. |
---|
1353 | * Diagonal entries are in U_j |
---|
1354 | */ |
---|
1355 | for(j=mcon; j<m; ++j){ |
---|
1356 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1357 | ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j |
---|
1358 | for(i=0; i<cnp; ++i) |
---|
1359 | ptr2[i]=ptr1[i*cnp+i]; |
---|
1360 | } |
---|
1361 | |
---|
1362 | /* |
---|
1363 | if(!(itno%100)){ |
---|
1364 | printf("Current estimate: "); |
---|
1365 | for(i=0; i<nvars; ++i) |
---|
1366 | printf("%.9g ", p[i]); |
---|
1367 | printf("-- errors %.9g %0.9g\n", ea_inf, p_eL2); |
---|
1368 | } |
---|
1369 | */ |
---|
1370 | |
---|
1371 | /* check for convergence */ |
---|
1372 | if((ea_inf <= eps1)){ |
---|
1373 | dp_L2=0.0; /* no increment for p in this case */ |
---|
1374 | stop=1; |
---|
1375 | break; |
---|
1376 | } |
---|
1377 | |
---|
1378 | /* compute initial damping factor */ |
---|
1379 | if(itno==0){ |
---|
1380 | for(i=mcon*cnp, tmp=DBL_MIN; i<nvars; ++i) |
---|
1381 | if(diagU[i]>tmp) tmp=diagU[i]; /* find max diagonal element */ |
---|
1382 | mu=tau*tmp; |
---|
1383 | } |
---|
1384 | |
---|
1385 | /* determine increment using adaptive damping */ |
---|
1386 | while(1){ |
---|
1387 | /* augment U */ |
---|
1388 | for(j=mcon; j<m; ++j){ |
---|
1389 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1390 | for(i=0; i<cnp; ++i) |
---|
1391 | ptr1[i*cnp+i]+=mu; |
---|
1392 | } |
---|
1393 | |
---|
1394 | /* solve the linear systems U_j da_j = ea_j to compute the da_j */ |
---|
1395 | _dblzero(dp, mcon*cnp); /* no change for the first mcon camera params */ |
---|
1396 | for(j=nsolved=mcon; j<m; ++j){ |
---|
1397 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1398 | ptr2=ea + j*easz; // set ptr2 to point to ea_j |
---|
1399 | ptr3=dp + j*cnp; // set ptr3 to point to da_j |
---|
1400 | |
---|
1401 | //nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, cnp, 0); ++nlss; |
---|
1402 | nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, cnp, 0); ++nlss; |
---|
1403 | //nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, cnp, 0); ++nlss; |
---|
1404 | //nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, cnp, 0); ++nlss; |
---|
1405 | //nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, cnp, 0); ++nlss; |
---|
1406 | //nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, cnp, 0); ++nlss; |
---|
1407 | //nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, cnp, (3*cnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); ++nlss; |
---|
1408 | } |
---|
1409 | |
---|
1410 | if(nsolved==m){ |
---|
1411 | |
---|
1412 | /* parameter vector updates are now in dp */ |
---|
1413 | |
---|
1414 | /* compute p's new estimate and ||dp||^2 */ |
---|
1415 | for(i=0, dp_L2=0.0; i<nvars; ++i){ |
---|
1416 | pdp[i]=p[i] + (tmp=dp[i]); |
---|
1417 | dp_L2+=tmp*tmp; |
---|
1418 | } |
---|
1419 | //dp_L2=sqrt(dp_L2); |
---|
1420 | |
---|
1421 | if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ |
---|
1422 | //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ |
---|
1423 | stop=2; |
---|
1424 | break; |
---|
1425 | } |
---|
1426 | |
---|
1427 | if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */ |
---|
1428 | //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ |
---|
1429 | stop=4; |
---|
1430 | break; |
---|
1431 | } |
---|
1432 | |
---|
1433 | (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */ |
---|
1434 | if(verbose>1) |
---|
1435 | printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); |
---|
1436 | for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute ||e(pdp)||_2 */ |
---|
1437 | hx[i]=tmp=x[i]-hx[i]; |
---|
1438 | pdp_eL2+=tmp*tmp; |
---|
1439 | } |
---|
1440 | |
---|
1441 | for(i=0, dL=0.0; i<nvars; ++i) |
---|
1442 | dL+=dp[i]*(mu*dp[i]+ea[i]); |
---|
1443 | |
---|
1444 | dF=p_eL2-pdp_eL2; |
---|
1445 | |
---|
1446 | if(verbose>1) |
---|
1447 | printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2); |
---|
1448 | |
---|
1449 | if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */ |
---|
1450 | tmp=(2.0*dF/dL-1.0); |
---|
1451 | tmp=1.0-tmp*tmp*tmp; |
---|
1452 | mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD ); |
---|
1453 | nu=2; |
---|
1454 | |
---|
1455 | for(i=0; i<nvars; ++i) /* update p's estimate */ |
---|
1456 | p[i]=pdp[i]; |
---|
1457 | |
---|
1458 | for(i=0; i<nobs; ++i) /* update e and ||e||_2 */ |
---|
1459 | e[i]=hx[i]; |
---|
1460 | p_eL2=pdp_eL2; |
---|
1461 | break; |
---|
1462 | } |
---|
1463 | } /* nsolved==m */ |
---|
1464 | |
---|
1465 | /* if this point is reached, either at least one linear system could not be solved or |
---|
1466 | * the error did not reduce; in any case, the increment must be rejected |
---|
1467 | */ |
---|
1468 | |
---|
1469 | mu*=nu; |
---|
1470 | nu2=nu<<1; // 2*nu; |
---|
1471 | if(nu2<=nu){ /* nu has wrapped around (overflown) */ |
---|
1472 | fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_mot_levmar_x()! Singular hessian matrix?\n"); |
---|
1473 | //exit(1); |
---|
1474 | stop=6; |
---|
1475 | break; |
---|
1476 | } |
---|
1477 | nu=nu2; |
---|
1478 | |
---|
1479 | /* restore U diagonal entries */ |
---|
1480 | for(j=mcon; j<m; ++j){ |
---|
1481 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1482 | ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j |
---|
1483 | for(i=0; i<cnp; ++i) |
---|
1484 | ptr1[i*cnp+i]=ptr2[i]; |
---|
1485 | } |
---|
1486 | } /* inner loop */ |
---|
1487 | |
---|
1488 | if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop |
---|
1489 | } |
---|
1490 | |
---|
1491 | if(itno>=itmax) stop=3; |
---|
1492 | |
---|
1493 | /* restore U diagonal entries */ |
---|
1494 | for(j=mcon; j<m; ++j){ |
---|
1495 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1496 | ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j |
---|
1497 | for(i=0; i<cnp; ++i) |
---|
1498 | ptr1[i*cnp+i]=ptr2[i]; |
---|
1499 | } |
---|
1500 | |
---|
1501 | if(info){ |
---|
1502 | info[0]=init_p_eL2; |
---|
1503 | info[1]=p_eL2; |
---|
1504 | info[2]=ea_inf; |
---|
1505 | info[3]=dp_L2; |
---|
1506 | for(j=mcon, tmp=DBL_MIN; j<m; ++j){ |
---|
1507 | ptr1=U + j*Usz; // set ptr1 to point to U_j |
---|
1508 | for(i=0; i<cnp; ++i) |
---|
1509 | if(tmp<ptr1[i*cnp+i]) tmp=ptr1[i*cnp+i]; |
---|
1510 | } |
---|
1511 | info[4]=mu/tmp; |
---|
1512 | info[5]=itno; |
---|
1513 | info[6]=stop; |
---|
1514 | info[7]=nfev; |
---|
1515 | info[8]=njev; |
---|
1516 | info[9]=nlss; |
---|
1517 | } |
---|
1518 | //sba_print_sol(n, m, p, cnp, 0, x, mnp, &idxij, rcidxs, rcsubs); |
---|
1519 | retval=(stop!=4)? itno : -1; |
---|
1520 | |
---|
1521 | freemem_and_return: /* NOTE: this point is also reached via a goto! */ |
---|
1522 | |
---|
1523 | /* free whatever was allocated */ |
---|
1524 | free(jac); free(U); |
---|
1525 | free(e); free(ea); |
---|
1526 | free(dp); |
---|
1527 | free(rcidxs); free(rcsubs); |
---|
1528 | |
---|
1529 | free(hx); free(diagU); free(pdp); |
---|
1530 | if(fdj_data.hxx){ // cleanup |
---|
1531 | free(fdj_data.hxx); |
---|
1532 | free(fdj_data.func_rcidxs); |
---|
1533 | } |
---|
1534 | |
---|
1535 | sba_crsm_free(&idxij); |
---|
1536 | |
---|
1537 | return retval; |
---|
1538 | } |
---|
1539 | |
---|
1540 | |
---|
1541 | /* Bundle adjustment on structure parameters only |
---|
1542 | * using the sparse Levenberg-Marquardt as described in HZ p. 568 |
---|
1543 | */ |
---|
1544 | |
---|
1545 | int sba_str_levmar_x( |
---|
1546 | const int n, /* number of points */ |
---|
1547 | const int m, /* number of images */ |
---|
1548 | char *vmask, /* visibility mask: vmask[i][j]=1 if point i visible in image j, 0 otherwise. nxm */ |
---|
1549 | double *p, /* initial parameter vector p0: (b1, ..., bn). |
---|
1550 | * bi are the i-th point parameters, * size n*pnp */ |
---|
1551 | const int pnp,/* number of parameters for ONE point; e.g. 3 for Euclidean points */ |
---|
1552 | double *x, /* measurements vector: (x_11^T, .. x_1m^T, ..., x_n1^T, .. x_nm^T)^T where |
---|
1553 | * x_ij is the projection of the i-th point on the j-th image. |
---|
1554 | * NOTE: some of the x_ij might be missing, if point i is not visible in image j; |
---|
1555 | * see vmask[i][j], max. size n*m*mnp |
---|
1556 | */ |
---|
1557 | const int mnp,/* number of parameters for EACH measurement; usually 2 */ |
---|
1558 | void (*func)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *hx, void *adata), |
---|
1559 | /* functional relation describing measurements. Given a parameter vector p, |
---|
1560 | * computes a prediction of the measurements \hat{x}. p is (n*pnp)x1, |
---|
1561 | * \hat{x} is (n*m*mnp)x1, maximum |
---|
1562 | * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used |
---|
1563 | * as working memory |
---|
1564 | */ |
---|
1565 | void (*fjac)(double *p, struct sba_crsm *idxij, int *rcidxs, int *rcsubs, double *jac, void *adata), |
---|
1566 | /* function to evaluate the sparse jacobian dX/dp. |
---|
1567 | * The Jacobian is returned in jac as |
---|
1568 | * (dx_11/db_1, ..., dx_1m/db_1, ..., dx_n1/db_n, ..., dx_nm/db_n), or (using HZ's notation), |
---|
1569 | * jac=(B_11, ..., B_1m, ..., B_n1, ..., B_nm) |
---|
1570 | * Notice that depending on idxij, some of the B_ij might be missing. |
---|
1571 | * Note also that B_ij are mnp x pnp matrices and they |
---|
1572 | * should be stored in jac in row-major order. |
---|
1573 | * rcidxs, rcsubs are max(m, n) x 1, allocated by the caller and can be used |
---|
1574 | * as working memory |
---|
1575 | * |
---|
1576 | * If NULL, the jacobian is approximated by repetitive func calls and finite |
---|
1577 | * differences. This is computationally inefficient and thus NOT recommended. |
---|
1578 | */ |
---|
1579 | void *adata, /* pointer to possibly additional data, passed uninterpreted to func, fjac */ |
---|
1580 | |
---|
1581 | int itmax, /* I: maximum number of iterations. itmax==0 signals jacobian verification followed by immediate return */ |
---|
1582 | int verbose, /* I: verbosity */ |
---|
1583 | double opts[SBA_OPTSSZ], |
---|
1584 | /* I: minim. options [\mu, \epsilon1, \epsilon2]. Respectively the scale factor for initial \mu, |
---|
1585 | * stopping thresholds for ||J^T e||_inf, ||dp||_2 and ||e||_2 |
---|
1586 | */ |
---|
1587 | double info[SBA_INFOSZ] |
---|
1588 | /* O: information regarding the minimization. Set to NULL if don't care |
---|
1589 | * info[0]=||e||_2 at initial p. |
---|
1590 | * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. |
---|
1591 | * info[5]= # iterations, |
---|
1592 | * info[6]=reason for terminating: 1 - stopped by small gradient J^T e |
---|
1593 | * 2 - stopped by small dp |
---|
1594 | * 3 - stopped by itmax |
---|
1595 | * 4 - singular matrix. Restart from current p with increased mu |
---|
1596 | * 5 - stopped by small ||e||_2 |
---|
1597 | * 6 - too many attempts to increase damping. Restart with increased mu |
---|
1598 | * info[7]= # function evaluations |
---|
1599 | * info[8]= # jacobian evaluations |
---|
1600 | * info[9]= # number of linear systems solved, i.e. number of attempts for reducing error |
---|
1601 | */ |
---|
1602 | ) |
---|
1603 | { |
---|
1604 | register int i, j, ii, jj, k; |
---|
1605 | int nvis, nnz, retval; |
---|
1606 | |
---|
1607 | /* The following are work arrays that are dynamically allocated by sba_str_levmar_x() */ |
---|
1608 | double *jac; /* work array for storing the jacobian, max. size n*m*mnp*pnp */ |
---|
1609 | double *V; /* work array for storing the V_i in the order V_1, ..., V_n, size n*pnp*pnp */ |
---|
1610 | |
---|
1611 | double *e; /* work array for storing the e_ij in the order e_11, ..., e_1m, ..., e_n1, ..., e_nm, |
---|
1612 | max. size n*m*mnp */ |
---|
1613 | double *eb; /* work array for storing the eb_i in the order eb_1, .. eb_n size n*pnp */ |
---|
1614 | |
---|
1615 | double *dp; /* work array for storing the parameter vector updates db_1, ..., db_n, size n*pnp */ |
---|
1616 | |
---|
1617 | /* Of the above arrays, jac, e, are sparse and |
---|
1618 | * V, eb, dp are dense. Sparse arrays are indexed through |
---|
1619 | * idxij (see below), that is with the same mechanism as the input |
---|
1620 | * measurements vector x |
---|
1621 | */ |
---|
1622 | |
---|
1623 | /* submatrices sizes */ |
---|
1624 | int Bsz, Vsz, |
---|
1625 | esz, ebsz; |
---|
1626 | |
---|
1627 | register double *ptr1, *ptr2, *ptr3, *ptr4, sum; |
---|
1628 | struct sba_crsm idxij; /* sparse matrix containing the location of x_ij in x. This is also the location |
---|
1629 | * of B_ij in jac, etc. |
---|
1630 | * This matrix can be thought as a map from a sparse set of pairs (i, j) to a continuous |
---|
1631 | * index k and it is used to efficiently lookup the memory locations where the non-zero |
---|
1632 | * blocks of a sparse matrix/vector are stored |
---|
1633 | */ |
---|
1634 | int maxnm=(n>=m)? n:m, /* max. of (n, m) */ |
---|
1635 | *rcidxs, /* work array for the indexes corresponding to the nonzero elements of a single row or |
---|
1636 | column in a sparse matrix, size max(n, m) */ |
---|
1637 | *rcsubs; /* work array for the subscripts of nonzero elements in a single row or column of a |
---|
1638 | sparse matrix, size max(n, m) */ |
---|
1639 | |
---|
1640 | /* The following variables are needed by the LM algorithm */ |
---|
1641 | register int itno; /* iteration counter */ |
---|
1642 | int nsolved; |
---|
1643 | /* temporary work arrays that are dynamically allocated */ |
---|
1644 | double *hx, /* \hat{x}_i, max. size m*n*mnp */ |
---|
1645 | *diagV, /* diagonals of V_i, size n*pnp */ |
---|
1646 | *pdp; /* p + dp, size n*pnp */ |
---|
1647 | |
---|
1648 | register double mu, /* damping constant */ |
---|
1649 | tmp; /* mainly used in matrix & vector multiplications */ |
---|
1650 | double p_eL2, eb_inf, pdp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+dp)||_2 */ |
---|
1651 | double p_L2, dp_L2=DBL_MAX, dF, dL; |
---|
1652 | double tau=FABS(opts[0]), eps1=FABS(opts[1]), eps2=FABS(opts[2]), eps2_sq=opts[2]*opts[2], eps3_sq=opts[3]*opts[3]; |
---|
1653 | double init_p_eL2; |
---|
1654 | int nu=2, nu2, stop, nfev, njev=0, nlss=0; |
---|
1655 | int nobs, nvars; |
---|
1656 | |
---|
1657 | struct fdj_data_x_ fdj_data; |
---|
1658 | void *jac_adata; |
---|
1659 | |
---|
1660 | /* Initialization */ |
---|
1661 | |
---|
1662 | /* block sizes */ |
---|
1663 | Bsz=mnp * pnp; Vsz=pnp * pnp; |
---|
1664 | esz=mnp; ebsz=pnp; |
---|
1665 | |
---|
1666 | /* count total number of visible image points */ |
---|
1667 | for(i=nvis=0, jj=n*m; i<jj; ++i) |
---|
1668 | nvis+=vmask[i]; |
---|
1669 | |
---|
1670 | nobs=nvis*mnp; |
---|
1671 | nvars=n*pnp; |
---|
1672 | if(nobs<nvars){ |
---|
1673 | fprintf(stderr, "sba_str_levmar_x(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", nobs, nvars); |
---|
1674 | exit(1); |
---|
1675 | } |
---|
1676 | |
---|
1677 | /* allocate work arrays */ |
---|
1678 | jac=(double *)emalloc(nvis*Bsz*sizeof(double)); |
---|
1679 | V=(double *)emalloc(n*Vsz*sizeof(double)); |
---|
1680 | e=(double *)emalloc(nobs*sizeof(double)); |
---|
1681 | eb=(double *)emalloc(nvars*sizeof(double)); |
---|
1682 | dp=(double *)emalloc(nvars*sizeof(double)); |
---|
1683 | rcidxs=(int *)emalloc(maxnm*sizeof(int)); |
---|
1684 | rcsubs=(int *)emalloc(maxnm*sizeof(int)); |
---|
1685 | |
---|
1686 | |
---|
1687 | hx=(double *)emalloc(nobs*sizeof(double)); |
---|
1688 | diagV=(double *)emalloc(nvars*sizeof(double)); |
---|
1689 | pdp=(double *)emalloc(nvars*sizeof(double)); |
---|
1690 | |
---|
1691 | /* allocate & fill up the idxij structure */ |
---|
1692 | sba_crsm_alloc(&idxij, n, m, nvis); |
---|
1693 | for(i=k=0; i<n; ++i){ |
---|
1694 | idxij.rowptr[i]=k; |
---|
1695 | ii=i*m; |
---|
1696 | for(j=0; j<m; ++j) |
---|
1697 | if(vmask[ii+j]){ |
---|
1698 | idxij.val[k]=k; |
---|
1699 | idxij.colidx[k++]=j; |
---|
1700 | } |
---|
1701 | } |
---|
1702 | idxij.rowptr[n]=nvis; |
---|
1703 | |
---|
1704 | /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ |
---|
1705 | if(!fjac){ |
---|
1706 | fdj_data.func=func; |
---|
1707 | fdj_data.cnp=0; |
---|
1708 | fdj_data.pnp=pnp; |
---|
1709 | fdj_data.mnp=mnp; |
---|
1710 | fdj_data.hx=hx; |
---|
1711 | fdj_data.hxx=(double *)emalloc(nobs*sizeof(double)); |
---|
1712 | fdj_data.func_rcidxs=(int *)emalloc(2*maxnm*sizeof(int)); |
---|
1713 | fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxnm; |
---|
1714 | fdj_data.adata=adata; |
---|
1715 | |
---|
1716 | fjac=sba_fdjac_x; |
---|
1717 | jac_adata=(void *)&fdj_data; |
---|
1718 | } |
---|
1719 | else{ |
---|
1720 | fdj_data.hxx=NULL; |
---|
1721 | jac_adata=adata; |
---|
1722 | } |
---|
1723 | |
---|
1724 | if(itmax==0){ /* verify jacobian */ |
---|
1725 | sba_str_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, pnp, mnp, adata, jac_adata); |
---|
1726 | retval=0; |
---|
1727 | goto freemem_and_return; |
---|
1728 | } |
---|
1729 | |
---|
1730 | /* compute the error vectors e_ij in hx */ |
---|
1731 | (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1; |
---|
1732 | /* compute e=x - f(p) and its L2 norm */ |
---|
1733 | for(i=0, p_eL2=0.0; i<nobs; ++i){ |
---|
1734 | e[i]=tmp=x[i]-hx[i]; |
---|
1735 | p_eL2+=tmp*tmp; |
---|
1736 | } |
---|
1737 | |
---|
1738 | if(verbose) printf("initial str-SBA error %g [%g]\n", p_eL2, p_eL2/nvis); |
---|
1739 | init_p_eL2=p_eL2; |
---|
1740 | |
---|
1741 | for(itno=stop=0; itno<itmax && !stop; ++itno){ |
---|
1742 | /* Note that p, e and ||e||_2 have been updated at the previous iteration */ |
---|
1743 | |
---|
1744 | /* compute derivative submatrices B_ij */ |
---|
1745 | (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev; |
---|
1746 | |
---|
1747 | /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! |
---|
1748 | /* V_i is symmetric, therefore its computation can be speeded up by |
---|
1749 | * computing only the upper part and then reusing it for the lower one. |
---|
1750 | * Recall that B_ij is mnp x pnp |
---|
1751 | */ |
---|
1752 | /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! |
---|
1753 | /* Recall that e_ij is mnp x 1 |
---|
1754 | */ |
---|
1755 | _dblzero(V, n*Vsz); /* clear all V_i */ |
---|
1756 | _dblzero(eb, n*ebsz); /* clear all eb_i */ |
---|
1757 | for(i=0; i<n; ++i){ |
---|
1758 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1759 | ptr2=eb + i*ebsz; // set ptr2 to point to eb_i |
---|
1760 | |
---|
1761 | nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ |
---|
1762 | for(j=0; j<nnz; ++j){ |
---|
1763 | /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ |
---|
1764 | ptr3=jac + idxij.val[rcidxs[j]]*Bsz; |
---|
1765 | |
---|
1766 | /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ |
---|
1767 | for(ii=0; ii<pnp; ++ii){ |
---|
1768 | for(jj=ii; jj<pnp; ++jj){ |
---|
1769 | for(k=0, sum=0.0; k<mnp; ++k) |
---|
1770 | sum+=ptr3[k*pnp+ii]*ptr3[k*pnp+jj]; |
---|
1771 | ptr1[ii*pnp+jj]+=sum; |
---|
1772 | } |
---|
1773 | |
---|
1774 | /* copy the LOWER TRIANGULAR PART of V_i from the upper one */ |
---|
1775 | for(jj=0; jj<ii; ++jj) |
---|
1776 | ptr1[ii*pnp+jj]=ptr1[jj*pnp+ii]; |
---|
1777 | } |
---|
1778 | |
---|
1779 | ptr4=e + idxij.val[rcidxs[j]]*esz; /* set ptr4 to point to e_ij */ |
---|
1780 | /* compute B_ij^T e_ij and add it to eb_i */ |
---|
1781 | for(ii=0; ii<pnp; ++ii){ |
---|
1782 | for(jj=0, sum=0.0; jj<mnp; ++jj) |
---|
1783 | sum+=ptr3[jj*pnp+ii]*ptr4[jj]; |
---|
1784 | ptr2[ii]+=sum; |
---|
1785 | } |
---|
1786 | } |
---|
1787 | } |
---|
1788 | |
---|
1789 | /* Compute ||J^T e||_inf and ||p||^2 */ |
---|
1790 | for(i=0, p_L2=eb_inf=0.0; i<nvars; ++i){ |
---|
1791 | if(eb_inf < (tmp=FABS(eb[i]))) eb_inf=tmp; |
---|
1792 | p_L2+=p[i]*p[i]; |
---|
1793 | } |
---|
1794 | //p_L2=sqrt(p_L2); |
---|
1795 | |
---|
1796 | /* save diagonal entries so that augmentation can be later canceled. |
---|
1797 | * Diagonal entries are in V_i |
---|
1798 | */ |
---|
1799 | for(i=0; i<n; ++i){ |
---|
1800 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1801 | ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i |
---|
1802 | for(j=0; j<pnp; ++j) |
---|
1803 | ptr2[j]=ptr1[j*pnp+j]; |
---|
1804 | } |
---|
1805 | |
---|
1806 | /* |
---|
1807 | if(!(itno%100)){ |
---|
1808 | printf("Current estimate: "); |
---|
1809 | for(i=0; i<nvars; ++i) |
---|
1810 | printf("%.9g ", p[i]); |
---|
1811 | printf("-- errors %.9g %0.9g\n", eb_inf, p_eL2); |
---|
1812 | } |
---|
1813 | */ |
---|
1814 | |
---|
1815 | /* check for convergence */ |
---|
1816 | if((eb_inf <= eps1)){ |
---|
1817 | dp_L2=0.0; /* no increment for p in this case */ |
---|
1818 | stop=1; |
---|
1819 | break; |
---|
1820 | } |
---|
1821 | |
---|
1822 | /* compute initial damping factor */ |
---|
1823 | if(itno==0){ |
---|
1824 | for(i=0, tmp=DBL_MIN; i<nvars; ++i) |
---|
1825 | if(diagV[i]>tmp) tmp=diagV[i]; /* find max diagonal element */ |
---|
1826 | mu=tau*tmp; |
---|
1827 | } |
---|
1828 | |
---|
1829 | /* determine increment using adaptive damping */ |
---|
1830 | while(1){ |
---|
1831 | /* augment V */ |
---|
1832 | for(i=0; i<n; ++i){ |
---|
1833 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1834 | for(j=0; j<pnp; ++j) |
---|
1835 | ptr1[j*pnp+j]+=mu; |
---|
1836 | } |
---|
1837 | |
---|
1838 | /* solve the linear systems V*_i db_i = eb_i to compute the db_i */ |
---|
1839 | for(i=nsolved=0; i<n; ++i){ |
---|
1840 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1841 | ptr2=eb + i*ebsz; // set ptr2 to point to eb_i |
---|
1842 | ptr3=dp + i*pnp; // set ptr3 to point to db_i |
---|
1843 | |
---|
1844 | //nsolved+=sba_Axb_LU(ptr1, ptr2, ptr3, pnp, 0); ++nlss; |
---|
1845 | nsolved+=sba_Axb_Chol(ptr1, ptr2, ptr3, pnp, 0); ++nlss; |
---|
1846 | //nsolved+=sba_Axb_BK(ptr1, ptr2, ptr3, pnp, 0); ++nlss; |
---|
1847 | //nsolved+=sba_Axb_QRnoQ(ptr1, ptr2, ptr3, pnp, 0); ++nlss; |
---|
1848 | //nsolved+=sba_Axb_QR(ptr1, ptr2, ptr3, pnp, 0); ++nlss; |
---|
1849 | //nsolved+=sba_Axb_SVD(ptr1, ptr2, ptr3, pnp, 0); ++nlss; |
---|
1850 | //nsolved+=(sba_Axb_CG(ptr1, ptr2, ptr3, pnp, (3*pnp)/2, 1E-10, SBA_CG_JACOBI, 0) > 0); ++nlss; |
---|
1851 | } |
---|
1852 | |
---|
1853 | if(nsolved==n){ |
---|
1854 | |
---|
1855 | /* parameter vector updates are now in dp */ |
---|
1856 | |
---|
1857 | /* compute p's new estimate and ||dp||^2 */ |
---|
1858 | for(i=0, dp_L2=0.0; i<nvars; ++i){ |
---|
1859 | pdp[i]=p[i] + (tmp=dp[i]); |
---|
1860 | dp_L2+=tmp*tmp; |
---|
1861 | } |
---|
1862 | //dp_L2=sqrt(dp_L2); |
---|
1863 | |
---|
1864 | if(dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ |
---|
1865 | //if(dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ |
---|
1866 | stop=2; |
---|
1867 | break; |
---|
1868 | } |
---|
1869 | |
---|
1870 | if(dp_L2>=(p_L2+eps2)/SBA_EPSILON_SQ){ /* almost singular */ |
---|
1871 | //if(dp_L2>=(p_L2+eps2)/SBA_EPSILON){ /* almost singular */ |
---|
1872 | stop=4; |
---|
1873 | break; |
---|
1874 | } |
---|
1875 | |
---|
1876 | (*func)(pdp, &idxij, rcidxs, rcsubs, hx, adata); ++nfev; /* evaluate function at p + dp */ |
---|
1877 | if(verbose>1) |
---|
1878 | printf("mean reprojection error %g\n", sba_mean_repr_error(n, mnp, x, hx, &idxij, rcidxs, rcsubs)); |
---|
1879 | for(i=0, pdp_eL2=0.0; i<nobs; ++i){ /* compute ||e(pdp)||_2 */ |
---|
1880 | hx[i]=tmp=x[i]-hx[i]; |
---|
1881 | pdp_eL2+=tmp*tmp; |
---|
1882 | } |
---|
1883 | |
---|
1884 | for(i=0, dL=0.0; i<nvars; ++i) |
---|
1885 | dL+=dp[i]*(mu*dp[i]+eb[i]); |
---|
1886 | |
---|
1887 | dF=p_eL2-pdp_eL2; |
---|
1888 | |
---|
1889 | if(verbose>1) |
---|
1890 | printf("\ndamping term %8g, gain ratio %8g, errors %8g / %8g = %g\n", mu, dL!=0.0? dF/dL : dF/DBL_EPSILON, p_eL2/nvis, pdp_eL2/nvis, p_eL2/pdp_eL2); |
---|
1891 | |
---|
1892 | if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */ |
---|
1893 | tmp=(2.0*dF/dL-1.0); |
---|
1894 | tmp=1.0-tmp*tmp*tmp; |
---|
1895 | mu=mu*( (tmp>=SBA_ONE_THIRD)? tmp : SBA_ONE_THIRD ); |
---|
1896 | nu=2; |
---|
1897 | |
---|
1898 | for(i=0; i<nvars; ++i) /* update p's estimate */ |
---|
1899 | p[i]=pdp[i]; |
---|
1900 | |
---|
1901 | for(i=0; i<nobs; ++i) /* update e and ||e||_2 */ |
---|
1902 | e[i]=hx[i]; |
---|
1903 | p_eL2=pdp_eL2; |
---|
1904 | break; |
---|
1905 | } |
---|
1906 | } /* nsolved==n */ |
---|
1907 | |
---|
1908 | /* if this point is reached, either at least one linear system could not be solved or |
---|
1909 | * the error did not reduce; in any case, the increment must be rejected |
---|
1910 | */ |
---|
1911 | |
---|
1912 | mu*=nu; |
---|
1913 | nu2=nu<<1; // 2*nu; |
---|
1914 | if(nu2<=nu){ /* nu has wrapped around (overflown) */ |
---|
1915 | fprintf(stderr, "Too many failed attempts to increase the damping factor in sba_str_levmar_x()! Singular hessian matrix?\n"); |
---|
1916 | //exit(1); |
---|
1917 | stop=6; |
---|
1918 | break; |
---|
1919 | } |
---|
1920 | nu=nu2; |
---|
1921 | |
---|
1922 | /* restore V diagonal entries */ |
---|
1923 | for(i=0; i<n; ++i){ |
---|
1924 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1925 | ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i |
---|
1926 | for(j=0; j<pnp; ++j) |
---|
1927 | ptr1[j*pnp+j]=ptr2[j]; |
---|
1928 | } |
---|
1929 | } /* inner loop */ |
---|
1930 | |
---|
1931 | if(p_eL2<=eps3_sq) stop=5; // error is small, force termination of outer loop |
---|
1932 | } |
---|
1933 | |
---|
1934 | if(itno>=itmax) stop=3; |
---|
1935 | |
---|
1936 | /* restore V diagonal entries */ |
---|
1937 | for(i=0; i<n; ++i){ |
---|
1938 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1939 | ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i |
---|
1940 | for(j=0; j<pnp; ++j) |
---|
1941 | ptr1[j*pnp+j]=ptr2[j]; |
---|
1942 | } |
---|
1943 | |
---|
1944 | if(info){ |
---|
1945 | info[0]=init_p_eL2; |
---|
1946 | info[1]=p_eL2; |
---|
1947 | info[2]=eb_inf; |
---|
1948 | info[3]=dp_L2; |
---|
1949 | for(i=0; i<n; ++i){ |
---|
1950 | ptr1=V + i*Vsz; // set ptr1 to point to V_i |
---|
1951 | for(j=0; j<pnp; ++j) |
---|
1952 | if(tmp<ptr1[j*pnp+j]) tmp=ptr1[j*pnp+j]; |
---|
1953 | } |
---|
1954 | info[4]=mu/tmp; |
---|
1955 | info[5]=itno; |
---|
1956 | info[6]=stop; |
---|
1957 | info[7]=nfev; |
---|
1958 | info[8]=njev; |
---|
1959 | info[9]=nlss; |
---|
1960 | } |
---|
1961 | //sba_print_sol(n, m, p, 0, pnp, x, mnp, &idxij, rcidxs, rcsubs); |
---|
1962 | retval=(stop!=4)? itno : -1; |
---|
1963 | |
---|
1964 | freemem_and_return: /* NOTE: this point is also reached via a goto! */ |
---|
1965 | |
---|
1966 | /* free whatever was allocated */ |
---|
1967 | free(jac); free(V); |
---|
1968 | free(e); free(eb); |
---|
1969 | free(dp); |
---|
1970 | free(rcidxs); free(rcsubs); |
---|
1971 | |
---|
1972 | free(hx); free(diagV); free(pdp); |
---|
1973 | if(fdj_data.hxx){ // cleanup |
---|
1974 | free(fdj_data.hxx); |
---|
1975 | free(fdj_data.func_rcidxs); |
---|
1976 | } |
---|
1977 | |
---|
1978 | sba_crsm_free(&idxij); |
---|
1979 | |
---|
1980 | return retval; |
---|
1981 | } |
---|