///////////////////////////////////////////////////////////////////////////////// //// //// Linear algebra operations for the sba package //// Copyright (C) 2004 Manolis Lourakis (lourakis@ics.forth.gr) //// Institute of Computer Science, Foundation for Research & Technology - Hellas //// Heraklion, Crete, Greece. //// //// This program is free software; you can redistribute it and/or modify //// it under the terms of the GNU General Public License as published by //// the Free Software Foundation; either version 2 of the License, or //// (at your option) any later version. //// //// This program is distributed in the hope that it will be useful, //// but WITHOUT ANY WARRANTY; without even the implied warranty of //// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //// GNU General Public License for more details. //// /////////////////////////////////////////////////////////////////////////////////// #include #include #include #include #include /* inline */ #ifdef _MSC_VER #define inline __inline //MSVC #elif !defined(__GNUC__) #define inline //other than MSVC, GCC: define empty #endif #include "sba.h" #ifdef SBA_APPEND_UNDERSCORE_SUFFIX #define F77_FUNC(func) func ## _ #else #define F77_FUNC(func) func #endif /* SBA_APPEND_UNDERSCORE_SUFFIX */ /* declarations of LAPACK routines employed */ /* QR decomposition */ extern int F77_FUNC(dgeqrf)(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info); extern int F77_FUNC(dorgqr)(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info); /* solution of triangular system */ extern int F77_FUNC(dtrtrs)(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info); /* cholesky decomposition, matrix inversion */ extern int F77_FUNC(dpotf2)(char *uplo, int *n, double *a, int *lda, int *info); extern int F77_FUNC(dpotrf)(char *uplo, int *n, double *a, int *lda, int *info); /* block version of dpotf2 */ extern int F77_FUNC(dpotri)(char *uplo, int *n, double *a, int *lda, int *info); /* LU decomposition, linear system solution and matrix inversion */ extern int F77_FUNC(dgetrf)(int *m, int *n, double *a, int *lda, int *ipiv, int *info); extern int F77_FUNC(dgetrs)(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info); extern int F77_FUNC(dgetri)(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); /* SVD */ extern int F77_FUNC(dgesvd)(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info); /* lapack 3.0 routine, faster than dgesvd() */ extern int F77_FUNC(dgesdd)(char *jobz, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *iwork, int *info); /* Bunch-Kaufman factorization of a real symmetric matrix A and solution of linear systems */ extern int F77_FUNC(dsytrf)(char *uplo, int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); extern int F77_FUNC(dsytrs)(char *uplo, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info); /* * This function returns the solution of Ax = b * * The function is based on QR decomposition with explicit computation of Q: * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes * Q R x = b or R x = Q^T b. * * A is mxm, b is mx1. Argument iscolmaj specifies whether A is * stored in column or row major order. Note that if iscolmaj==1 * this function modifies A! * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. */ int sba_Axb_QR(double *A, double *B, double *x, int m, int iscolmaj) { static double *buf=NULL; static int buf_sz=0; double *a, *qtb, *r, *tau, *work; int a_sz, qtb_sz, r_sz, tau_sz, tot_sz; register int i, j; int info, worksz, nrhs=1; register double sum; /* calculate required memory size */ a_sz=(iscolmaj)? 0 : m*m; qtb_sz=m; r_sz=m*m; /* only the upper triangular part really needed */ tau_sz=m; worksz=3*m; /* this is probably too much */ tot_sz=a_sz + qtb_sz + r_sz + tau_sz + worksz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_QR() failed!\n"); exit(1); } } if(!iscolmaj){ a=buf; /* store A (column major!) into a */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_QRnoQ() failed!\n"); exit(1); } } if(!iscolmaj){ a=buf; /* store A (column major!) into a */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_Chol() failed!\n"); exit(1); } } if(!iscolmaj){ a=buf; b=a+a_sz; /* store A (column major!) into a anb B into b */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_LU() failed!\n"); exit(1); } } ipiv=(int *)buf; if(!iscolmaj){ a=(double *)(ipiv + ipiv_sz); b=a+a_sz; /* store A (column major!) into a and B into b */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_SVD() failed!\n"); exit(1); } } iwork=(int *)buf; if(!iscolmaj){ a=(double *)(iwork+iworksz); /* store A (column major!) into a */ for(i=0; i0.0; eps*=0.5) ; eps*=2.0; } /* compute the pseudoinverse in a */ memset(a, 0, m*m*sizeof(double)); /* initialize to zero */ for(rank=0, thresh=eps*s[0]; rankthresh; rank++){ one_over_denom=1.0/s[rank]; for(j=0; jbuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz); if(!buf){ fprintf(stderr, "memory allocation in sba_Axb_BK() failed!\n"); exit(1); } } ipiv=(int *)buf; if(!iscolmaj){ a=(double *)(ipiv + ipiv_sz); b=a+a_sz; work=b+b_sz; /* store A (column major!) into a and B into b */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz); if(!buf){ fprintf(stderr, "memory allocation in sba_mat_invert_LU() failed!\n"); exit(1); } } ipiv=(int *)buf; a=(double *)(ipiv + ipiv_sz); work=a+a_sz; /* store A (column major!) into a */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_mat_invert_Chol() failed!\n"); exit(1); } } a=(double *)buf; /* store A (column major!) into a */ for(i=0; i0 then * the starting point is taken to be zero. Argument prec selects the desired * preconditioning method as follows: * 0: no preconditioning * 1: jacobi (diagonal) preconditioning * 2: SSOR preconditioning * Argument iscolmaj specifies whether A is stored in column or row major order. * * The function returns 0 in case of error, * the number of iterations performed if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. */ int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj) { static double *buf=NULL; static int buf_sz=0; register int i, j; register double *aim; int iter, a_sz, res_sz, d_sz, q_sz, s_sz, wk_sz, z_sz, tot_sz; double *a, *res, *d, *q, *s, *wk, *z; double delta0, deltaold, deltanew, alpha, beta, eps_sq=eps*eps; register double sum; int rec_res; /* calculate required memory size */ a_sz=(iscolmaj)? m*m : 0; res_sz=m; d_sz=m; q_sz=m; if(prec!=SBA_CG_NOPREC){ s_sz=m; wk_sz=m; z_sz=(prec==SBA_CG_SSOR)? m : 0; } else s_sz=wk_sz=z_sz=0; tot_sz=a_sz+res_sz+d_sz+q_sz+s_sz+wk_sz+z_sz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation request failed in sba_Axb_CG()\n"); exit(1); } } if(iscolmaj){ a=buf; /* store A (row major!) into a */ for(i=0; iDBL_EPSILON || -sum<-DBL_EPSILON) // != 0.0 wk[i]=1.0/sum; else wk[i]=1.0/DBL_EPSILON; } } else{ s=res; wk=z=NULL; } if(niter>0) for(i=0; i=0; --i){ for(j=i+1, sum=0.0, aim=a+i*m; jeps_sq*delta0 && iter<=niter; ++iter){ for(i=0; i=0; --i){ for(j=i+1, sum=0.0, aim=a+i*m; j