Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

sba_lapack.c @ 37

Last change on this file since 37 was 37, checked in by (none), 14 years ago
Added original make3d
File size: 35.3 KB

Rev	Line
[37]	1	/////////////////////////////////////////////////////////////////////////////////
	2	////
	3	//// Linear algebra operations for the sba package
	4	//// Copyright (C) 2004 Manolis Lourakis (lourakis@ics.forth.gr)
	5	//// Institute of Computer Science, Foundation for Research & Technology - Hellas
	6	//// Heraklion, Crete, Greece.
	7	////
	8	//// This program is free software; you can redistribute it and/or modify
	9	//// it under the terms of the GNU General Public License as published by
	10	//// the Free Software Foundation; either version 2 of the License, or
	11	//// (at your option) any later version.
	12	////
	13	//// This program is distributed in the hope that it will be useful,
	14	//// but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	//// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	//// GNU General Public License for more details.
	17	////
	18	///////////////////////////////////////////////////////////////////////////////////
	19
	20	#include <stdio.h>
	21	#include <stdlib.h>
	22	#include <string.h>
	23	#include <math.h>
	24	#include <float.h>
	25
	26	/* inline */
	27	#ifdef _MSC_VER
	28	#define inline __inline //MSVC
	29	#elif !defined(__GNUC__)
	30	#define inline //other than MSVC, GCC: define empty
	31	#endif
	32
	33	#include "sba.h"
	34
	35	#ifdef SBA_APPEND_UNDERSCORE_SUFFIX
	36	#define F77_FUNC(func) func ## _
	37	#else
	38	#define F77_FUNC(func) func
	39	#endif /* SBA_APPEND_UNDERSCORE_SUFFIX */
	40
	41
	42	/* declarations of LAPACK routines employed */
	43
	44	/* QR decomposition */
	45	extern int F77_FUNC(dgeqrf)(int m, int n, double a, int lda, double tau, double work, int lwork, int info);
	46	extern int F77_FUNC(dorgqr)(int m, int n, int k, double a, int lda, double tau, double work, int lwork, int *info);
	47
	48	/* solution of triangular system */
	49	extern int F77_FUNC(dtrtrs)(char uplo, char trans, char diag, int n, int nrhs, double a, int lda, double b, int ldb, int info);
	50
	51	/* cholesky decomposition, matrix inversion */
	52	extern int F77_FUNC(dpotf2)(char uplo, int n, double a, int lda, int *info);
	53	extern int F77_FUNC(dpotrf)(char uplo, int n, double a, int lda, int info); / block version of dpotf2 */
	54	extern int F77_FUNC(dpotri)(char uplo, int n, double a, int lda, int *info);
	55
	56	/* LU decomposition, linear system solution and matrix inversion */
	57	extern int F77_FUNC(dgetrf)(int m, int n, double a, int lda, int ipiv, int info);
	58	extern int F77_FUNC(dgetrs)(char trans, int n, int nrhs, double a, int lda, int ipiv, double b, int ldb, int *info);
	59	extern int F77_FUNC(dgetri)(int n, double a, int lda, int ipiv, double work, int lwork, int *info);
	60
	61	/* SVD */
	62	extern int F77_FUNC(dgesvd)(char jobu, char jobvt, int m, int n,
	63	double a, int lda, double s, double u, int *ldu,
	64	double vt, int ldvt, double work, int lwork,
	65	int *info);
	66
	67	/* lapack 3.0 routine, faster than dgesvd() */
	68	extern int F77_FUNC(dgesdd)(char jobz, int m, int n, double a, int *lda,
	69	double s, double u, int ldu, double vt, int *ldvt,
	70	double work, int lwork, int iwork, int info);
	71
	72
	73	/* Bunch-Kaufman factorization of a real symmetric matrix A and solution of linear systems */
	74	extern int F77_FUNC(dsytrf)(char uplo, int n, double a, int lda, int ipiv, double work, int lwork, int info);
	75	extern int F77_FUNC(dsytrs)(char uplo, int n, int nrhs, double a, int lda, int ipiv, double b, int ldb, int *info);
	76
	77
	78	/*
	79	* This function returns the solution of Ax = b
	80	*
	81	* The function is based on QR decomposition with explicit computation of Q:
	82	* If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
	83	* Q R x = b or R x = Q^T b.
	84	*
	85	* A is mxm, b is mx1. Argument iscolmaj specifies whether A is
	86	* stored in column or row major order. Note that if iscolmaj==1
	87	* this function modifies A!
	88	*
	89	* The function returns 0 in case of error, 1 if successfull
	90	*
	91	* This function is often called repetitively to solve problems of identical
	92	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	93	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	94	*/
	95	int sba_Axb_QR(double A, double B, double *x, int m, int iscolmaj)
	96	{
	97	static double *buf=NULL;
	98	static int buf_sz=0;
	99
	100	double a, qtb, r, tau, *work;
	101	int a_sz, qtb_sz, r_sz, tau_sz, tot_sz;
	102	register int i, j;
	103	int info, worksz, nrhs=1;
	104	register double sum;
	105
	106	/* calculate required memory size */
	107	a_sz=(iscolmaj)? 0 : m*m;
	108	qtb_sz=m;
	109	r_sz=mm; / only the upper triangular part really needed */
	110	tau_sz=m;
	111	worksz=3m; / this is probably too much */
	112	tot_sz=a_sz + qtb_sz + r_sz + tau_sz + worksz;
	113
	114	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	115	if(buf) free(buf); /* free previously allocated memory */
	116
	117	buf_sz=tot_sz;
	118	buf=(double )malloc(buf_szsizeof(double));
	119	if(!buf){
	120	fprintf(stderr, "memory allocation in sba_Axb_QR() failed!\n");
	121	exit(1);
	122	}
	123	}
	124
	125	if(!iscolmaj){
	126	a=buf;
	127	/* store A (column major!) into a */
	128	for(i=0; i<m; i++)
	129	for(j=0; j<m; j++)
	130	a[i+jm]=A[im+j];
	131	}
	132	else a=A; /* no copying required */
	133
	134	qtb=buf+a_sz;
	135	r=qtb+qtb_sz;
	136	tau=r+r_sz;
	137	work=tau+tau_sz;
	138
	139	/* QR decomposition of A */
	140	F77_FUNC(dgeqrf)((int )&m, (int )&m, a, (int )&m, tau, work, (int )&worksz, (int *)&info);
	141	/* error treatment */
	142	if(info!=0){
	143	if(info<0){
	144	fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QR()\n", -info);
	145	exit(1);
	146	}
	147	else{
	148	fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QR()\n", info);
	149	return 0;
	150	}
	151	}
	152
	153	/* R is now stored in the upper triangular part of a; copy it in r so that dorgqr() below won't destroy it */
	154	for(i=0; i<r_sz; i++)
	155	r[i]=a[i];
	156
	157	/* compute Q using the elementary reflectors computed by the above decomposition */
	158	F77_FUNC(dorgqr)((int )&m, (int )&m, (int )&m, a, (int )&m, tau, work, (int )&worksz, (int )&info);
	159	if(info!=0){
	160	if(info<0){
	161	fprintf(stderr, "LAPACK error: illegal value for argument %d of dorgqr in sba_Axb_QR()\n", -info);
	162	exit(1);
	163	}
	164	else{
	165	fprintf(stderr, "Unknown LAPACK error (%d) in sba_Axb_QR()\n", info);
	166	return 0;
	167	}
	168	}
	169
	170	/* Q is now in a; compute Q^T b in qtb */
	171	for(i=0; i<m; i++){
	172	for(j=0, sum=0.0; j<m; j++)
	173	sum+=a[im+j]B[j];
	174	qtb[i]=sum;
	175	}
	176
	177	/* solve the linear system R x = Q^t b */
	178	F77_FUNC(dtrtrs)("U", "N", "N", (int )&m, (int )&nrhs, r, (int )&m, qtb, (int )&m, &info);
	179	/* error treatment */
	180	if(info!=0){
	181	if(info<0){
	182	fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QR()\n", -info);
	183	exit(1);
	184	}
	185	else{
	186	fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QR()\n", info);
	187	return 0;
	188	}
	189	}
	190
	191	/* copy the result in x */
	192	for(i=0; i<m; i++)
	193	x[i]=qtb[i];
	194
	195	return 1;
	196	}
	197
	198	/*
	199	* This function returns the solution of Ax = b
	200	*
	201	* The function is based on QR decomposition without computation of Q:
	202	* If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
	203	* (A^T A) x = A^T b or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b.
	204	* This amounts to solving R^T y = A^T b for y and then R x = y for x
	205	* Note that Q does not need to be explicitly computed
	206	*
	207	* A is mxm, b is mx1. Argument iscolmaj specifies whether A is
	208	* stored in column or row major order. Note that if iscolmaj==1
	209	* this function modifies A!
	210	*
	211	* The function returns 0 in case of error, 1 if successfull
	212	*
	213	* This function is often called repetitively to solve problems of identical
	214	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	215	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	216	*/
	217	int sba_Axb_QRnoQ(double A, double B, double *x, int m, int iscolmaj)
	218	{
	219	static double *buf=NULL;
	220	static int buf_sz=0;
	221
	222	double a, atb, tau, work;
	223	int a_sz, atb_sz, tau_sz, tot_sz;
	224	register int i, j;
	225	int info, worksz, nrhs=1;
	226	register double sum;
	227
	228	/* calculate required memory size */
	229	a_sz=(iscolmaj)? 0 : m*m;
	230	atb_sz=m;
	231	tau_sz=m;
	232	worksz=3m; / this is probably too much */
	233	tot_sz=a_sz + atb_sz + tau_sz + worksz;
	234
	235	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	236	if(buf) free(buf); /* free previously allocated memory */
	237
	238	buf_sz=tot_sz;
	239	buf=(double )malloc(buf_szsizeof(double));
	240	if(!buf){
	241	fprintf(stderr, "memory allocation in sba_Axb_QRnoQ() failed!\n");
	242	exit(1);
	243	}
	244	}
	245
	246	if(!iscolmaj){
	247	a=buf;
	248	/* store A (column major!) into a */
	249	for(i=0; i<m; i++)
	250	for(j=0; j<m; j++)
	251	a[i+jm]=A[im+j];
	252	}
	253	else a=A; /* no copying required */
	254
	255	atb=buf+a_sz;
	256	tau=atb+atb_sz;
	257	work=tau+tau_sz;
	258
	259	/* compute A^T b in atb */
	260	for(i=0; i<m; i++){
	261	for(j=0, sum=0.0; j<m; j++)
	262	sum+=a[im+j]B[j];
	263	atb[i]=sum;
	264	}
	265
	266	/* QR decomposition of A */
	267	F77_FUNC(dgeqrf)((int )&m, (int )&m, a, (int )&m, tau, work, (int )&worksz, (int *)&info);
	268	/* error treatment */
	269	if(info!=0){
	270	if(info<0){
	271	fprintf(stderr, "LAPACK error: illegal value for argument %d of dgeqrf in sba_Axb_QRnoQ()\n", -info);
	272	exit(1);
	273	}
	274	else{
	275	fprintf(stderr, "Unknown LAPACK error %d for dgeqrf in sba_Axb_QRnoQ()\n", info);
	276	return 0;
	277	}
	278	}
	279
	280	/* R is stored in the upper triangular part of a */
	281
	282	/* solve the linear system R^T y = A^t b */
	283	F77_FUNC(dtrtrs)("U", "T", "N", (int )&m, (int )&nrhs, a, (int )&m, atb, (int )&m, &info);
	284	/* error treatment */
	285	if(info!=0){
	286	if(info<0){
	287	fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info);
	288	exit(1);
	289	}
	290	else{
	291	fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", info);
	292	return 0;
	293	}
	294	}
	295
	296	/* solve the linear system R x = y */
	297	F77_FUNC(dtrtrs)("U", "N", "N", (int )&m, (int )&nrhs, a, (int )&m, atb, (int )&m, &info);
	298	/* error treatment */
	299	if(info!=0){
	300	if(info<0){
	301	fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_QRnoQ()\n", -info);
	302	exit(1);
	303	}
	304	else{
	305	fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_QRnoQ()\n", info);
	306	return 0;
	307	}
	308	}
	309
	310	/* copy the result in x */
	311	for(i=0; i<m; i++)
	312	x[i]=atb[i];
	313
	314	return 1;
	315	}
	316
	317	/*
	318	* This function returns the solution of Ax=b
	319	*
	320	* The function assumes that A is symmetric & positive definite and employs
	321	* the Cholesky decomposition:
	322	* If A=U^T U with U upper triangular, the system to be solved becomes
	323	* (U^T U) x = b
	324	* This amount to solving U^T y = b for y and then U x = y for x
	325	*
	326	* A is mxm, b is mx1. Argument iscolmaj specifies whether A is
	327	* stored in column or row major order. Note that if iscolmaj==1
	328	* this function modifies A and B!
	329	*
	330	* The function returns 0 in case of error, 1 if successfull
	331	*
	332	* This function is often called repetitively to solve problems of identical
	333	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	334	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	335	*/
	336	int sba_Axb_Chol(double A, double B, double *x, int m, int iscolmaj)
	337	{
	338	static double *buf=NULL;
	339	static int buf_sz=0;
	340
	341	double a, b;
	342	int a_sz, b_sz, tot_sz;
	343	register int i, j;
	344	int info, nrhs=1;
	345
	346	/* calculate required memory size */
	347	a_sz=(iscolmaj)? 0 : m*m;
	348	b_sz=(iscolmaj)? 0 : m;
	349	tot_sz=a_sz + b_sz;
	350
	351	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	352	if(buf) free(buf); /* free previously allocated memory */
	353
	354	buf_sz=tot_sz;
	355	buf=(double )malloc(buf_szsizeof(double));
	356	if(!buf){
	357	fprintf(stderr, "memory allocation in sba_Axb_Chol() failed!\n");
	358	exit(1);
	359	}
	360	}
	361
	362	if(!iscolmaj){
	363	a=buf;
	364	b=a+a_sz;
	365
	366	/* store A (column major!) into a anb B into b */
	367	for(i=0; i<m; i++){
	368	for(j=0; j<m; j++)
	369	a[i+jm]=A[im+j];
	370
	371	b[i]=B[i];
	372	}
	373	}
	374	else{ /* no copying is necessary */
	375	a=A;
	376	b=B;
	377	}
	378
	379	/* Cholesky decomposition of A */
	380	//F77_FUNC(dpotf2)("U", (int )&m, a, (int )&m, (int *)&info);
	381	F77_FUNC(dpotrf)("U", (int )&m, a, (int )&m, (int *)&info);
	382	/* error treatment */
	383	if(info!=0){
	384	if(info<0){
	385	fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2/dpotrf in sba_Axb_Chol()\n", -info);
	386	exit(1);
	387	}
	388	else{
	389	fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for dpotf2/dpotrf in sba_Axb_Chol()\n", info);
	390	return 0;
	391	}
	392	}
	393
	394	/* solve the linear system U^T y = b */
	395	F77_FUNC(dtrtrs)("U", "T", "N", (int )&m, (int )&nrhs, a, (int )&m, b, (int )&m, &info);
	396	/* error treatment */
	397	if(info!=0){
	398	if(info<0){
	399	fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info);
	400	exit(1);
	401	}
	402	else{
	403	fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n", info);
	404	return 0;
	405	}
	406	}
	407
	408	/* solve the linear system U x = y */
	409	F77_FUNC(dtrtrs)("U", "N", "N", (int )&m, (int )&nrhs, a, (int )&m, b, (int )&m, &info);
	410	/* error treatment */
	411	if(info!=0){
	412	if(info<0){
	413	fprintf(stderr, "LAPACK error: illegal value for argument %d of dtrtrs in sba_Axb_Chol()\n", -info);
	414	exit(1);
	415	}
	416	else{
	417	fprintf(stderr, "LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in sba_Axb_Chol()\n", info);
	418	return 0;
	419	}
	420	}
	421
	422	/* copy the result in x */
	423	for(i=0; i<m; i++)
	424	x[i]=b[i];
	425
	426	return 1;
	427	}
	428
	429	/*
	430	* This function returns the solution of Ax = b
	431	*
	432	* The function employs LU decomposition:
	433	* If A=L U with L lower and U upper triangular, then the original system
	434	* amounts to solving
	435	* L y = b, U x = y
	436	*
	437	* A is mxm, b is mx1. Argument iscolmaj specifies whether A is
	438	* stored in column or row major order. Note that if iscolmaj==1
	439	* this function modifies A and B!
	440	*
	441	* The function returns 0 in case of error,
	442	* 1 if successfull
	443	*
	444	* This function is often called repetitively to solve problems of identical
	445	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	446	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	447	*/
	448	int sba_Axb_LU(double A, double B, double *x, int m, int iscolmaj)
	449	{
	450	static double *buf=NULL;
	451	static int buf_sz=0;
	452
	453	int a_sz, ipiv_sz, b_sz, tot_sz;
	454	register int i, j;
	455	int info, *ipiv, nrhs=1;
	456	double a, b;
	457
	458	/* calculate required memory size */
	459	ipiv_sz=m;
	460	a_sz=(iscolmaj)? 0 : m*m;
	461	b_sz=(iscolmaj)? 0 : m;
	462	tot_sz=ipiv_szsizeof(int) + (a_sz + b_sz)sizeof(double);
	463
	464	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	465	if(buf) free(buf); /* free previously allocated memory */
	466
	467	buf_sz=tot_sz;
	468	buf=(double *)malloc(buf_sz);
	469	if(!buf){
	470	fprintf(stderr, "memory allocation in sba_Axb_LU() failed!\n");
	471	exit(1);
	472	}
	473	}
	474
	475	ipiv=(int *)buf;
	476	if(!iscolmaj){
	477	a=(double *)(ipiv + ipiv_sz);
	478	b=a+a_sz;
	479
	480	/* store A (column major!) into a and B into b */
	481	for(i=0; i<m; i++){
	482	for(j=0; j<m; j++)
	483	a[i+jm]=A[im+j];
	484
	485	b[i]=B[i];
	486	}
	487	}
	488	else{ /* no copying is necessary */
	489	a=A;
	490	b=B;
	491	}
	492
	493	/* LU decomposition for A */
	494	F77_FUNC(dgetrf)((int )&m, (int )&m, a, (int )&m, ipiv, (int )&info);
	495	if(info!=0){
	496	if(info<0){
	497	fprintf(stderr, "argument %d of dgetrf illegal in sba_Axb_LU()\n", -info);
	498	exit(1);
	499	}
	500	else{
	501	fprintf(stderr, "singular matrix A for dgetrf in sba_Axb_LU()\n");
	502	return 0;
	503	}
	504	}
	505
	506	/* solve the system with the computed LU */
	507	F77_FUNC(dgetrs)("N", (int )&m, (int )&nrhs, a, (int )&m, ipiv, b, (int )&m, (int *)&info);
	508	if(info!=0){
	509	if(info<0){
	510	fprintf(stderr, "argument %d of dgetrs illegal in sba_Axb_LU()\n", -info);
	511	exit(1);
	512	}
	513	else{
	514	fprintf(stderr, "unknown error for dgetrs in sba_Axb_LU()\n");
	515	return 0;
	516	}
	517	}
	518
	519	/* copy the result in x */
	520	for(i=0; i<m; i++){
	521	x[i]=b[i];
	522	}
	523
	524	return 1;
	525	}
	526
	527	/*
	528	* This function returns the solution of Ax = b
	529	*
	530	* The function is based on SVD decomposition:
	531	* If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
	532	* (U D V^T) x = b or x=V D^{-1} U^T b
	533	* Note that V D^{-1} U^T is the pseudoinverse A^+
	534	*
	535	* A is mxm, b is mx1. Argument iscolmaj specifies whether A is
	536	* stored in column or row major order. Note that if iscolmaj==1
	537	* this function modifies A!
	538	*
	539	* The function returns 0 in case of error, 1 if successfull
	540	*
	541	* This function is often called repetitively to solve problems of identical
	542	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	543	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	544	*/
	545	int sba_Axb_SVD(double A, double B, double *x, int m, int iscolmaj)
	546	{
	547	static double *buf=NULL;
	548	static int buf_sz=0;
	549	static double eps=-1.0;
	550
	551	register int i, j;
	552	double a, u, s, vt, *work;
	553	int a_sz, u_sz, s_sz, vt_sz, tot_sz;
	554	double thresh, one_over_denom;
	555	register double sum;
	556	int info, rank, worksz, *iwork, iworksz;
	557
	558	/* calculate required memory size */
	559	worksz=-1; // workspace query. Keep in mind that dgesdd requires more memory than dgesvd
	560	/* note that optimal work size is returned in thresh */
	561	F77_FUNC(dgesdd)("A", (int )&m, (int )&m, NULL, (int )&m, NULL, NULL, (int )&m, NULL, (int *)&m,
	562	(double )&thresh, (int )&worksz, NULL, &info);
	563	/* F77_FUNC(dgesvd)("A", "A", (int )&m, (int )&m, NULL, (int )&m, NULL, NULL, (int )&m, NULL, (int *)&m,
	564	(double )&thresh, (int )&worksz, &info); */
	565	worksz=(int)thresh;
	566	iworksz=8*m;
	567	a_sz=(!iscolmaj)? m*m : 0;
	568	u_sz=mm; s_sz=m; vt_sz=mm;
	569
	570	tot_sz=iworkszsizeof(int) + (a_sz + u_sz + s_sz + vt_sz + worksz)sizeof(double);
	571
	572	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	573	if(buf) free(buf); /* free previously allocated memory */
	574
	575	buf_sz=tot_sz;
	576	buf=(double *)malloc(buf_sz);
	577	if(!buf){
	578	fprintf(stderr, "memory allocation in sba_Axb_SVD() failed!\n");
	579	exit(1);
	580	}
	581	}
	582
	583	iwork=(int *)buf;
	584	if(!iscolmaj){
	585	a=(double *)(iwork+iworksz);
	586	/* store A (column major!) into a */
	587	for(i=0; i<m; i++)
	588	for(j=0; j<m; j++)
	589	a[i+jm]=A[im+j];
	590	}
	591	else{
	592	a=A; /* no copying required */
	593	}
	594
	595	u=((double *)(iwork+iworksz)) + a_sz;
	596	s=u+u_sz;
	597	vt=s+s_sz;
	598	work=vt+vt_sz;
	599
	600	/* SVD decomposition of A */
	601	F77_FUNC(dgesdd)("A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, iwork, &info);
	602	//F77_FUNC(dgesvd)("A", "A", (int )&m, (int )&m, a, (int )&m, s, u, (int )&m, vt, (int )&m, work, (int )&worksz, &info);
	603
	604	/* error treatment */
	605	if(info!=0){
	606	if(info<0){
	607	fprintf(stderr, "LAPACK error: illegal value for argument %d of dgesdd/dgesvd in sba_Axb_SVD()\n", -info);
	608	exit(1);
	609	}
	610	else{
	611	fprintf(stderr, "LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in sba_Axb_SVD() [info=%d]\n", info);
	612
	613	return 0;
	614	}
	615	}
	616
	617	if(eps<0.0){
	618	double aux;
	619
	620	/* compute machine epsilon. DBL_EPSILON should do also */
	621	for(eps=1.0; aux=eps+1.0, aux-1.0>0.0; eps*=0.5)
	622	;
	623	eps*=2.0;
	624	}
	625
	626	/* compute the pseudoinverse in a */
	627	memset(a, 0, mmsizeof(double)); /* initialize to zero */
	628	for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
	629	one_over_denom=1.0/s[rank];
	630
	631	for(j=0; j<m; j++)
	632	for(i=0; i<m; i++)
	633	a[im+j]+=vt[rank+im]u[j+rankm]*one_over_denom;
	634	}
	635
	636	/* compute A^+ b in x */
	637	for(i=0; i<m; i++){
	638	for(j=0, sum=0.0; j<m; j++)
	639	sum+=a[im+j]B[j];
	640	x[i]=sum;
	641	}
	642
	643	return 1;
	644	}
	645
	646	/*
	647	* This function returns the solution of Ax = b for a real symmetric matrix A
	648	*
	649	* The function is based on Bunch-Kaufman factorization:
	650	* A is factored as UDU^T where U is upper triangular and
	651	* D symmetric and block diagonal
	652	*
	653	* A is mxm, b is mx1. Argument iscolmaj specifies whether A is
	654	* stored in column or row major order. Note that if iscolmaj==1
	655	* this function modifies A and B!
	656	*
	657	* The function returns 0 in case of error,
	658	* 1 if successfull
	659	*
	660	* This function is often called repetitively to solve problems of identical
	661	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	662	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	663	*/
	664	int sba_Axb_BK(double A, double B, double *x, int m, int iscolmaj)
	665	{
	666	static double *buf=NULL;
	667	static int buf_sz=0;
	668
	669	int a_sz, ipiv_sz, b_sz, work_sz, tot_sz;
	670	register int i, j;
	671	int info, *ipiv, nrhs=1;
	672	double a, b, *work;
	673
	674	/* calculate required memory size */
	675	ipiv_sz=m;
	676	a_sz=(iscolmaj)? 0 : m*m;
	677	b_sz=(iscolmaj)? 0 : m;
	678	work_sz=16m; / this is probably too much */
	679	tot_sz=ipiv_szsizeof(int) + (a_sz + b_sz + work_sz)sizeof(double);
	680
	681	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	682	if(buf) free(buf); /* free previously allocated memory */
	683
	684	buf_sz=tot_sz;
	685	buf=(double *)malloc(buf_sz);
	686	if(!buf){
	687	fprintf(stderr, "memory allocation in sba_Axb_BK() failed!\n");
	688	exit(1);
	689	}
	690	}
	691
	692	ipiv=(int *)buf;
	693	if(!iscolmaj){
	694	a=(double *)(ipiv + ipiv_sz);
	695	b=a+a_sz;
	696	work=b+b_sz;
	697
	698	/* store A (column major!) into a and B into b */
	699	for(i=0; i<m; i++){
	700	for(j=0; j<m; j++)
	701	a[i+jm]=A[im+j];
	702
	703	b[i]=B[i];
	704	}
	705	}
	706	else{ /* no copying is necessary */
	707	a=A;
	708	b=B;
	709	work=(double *)(ipiv + ipiv_sz);
	710	}
	711
	712	/* factorization for A */
	713	F77_FUNC(dsytrf)("U", (int )&m, a, (int )&m, ipiv, work, (int )&work_sz, (int )&info);
	714	if(info!=0){
	715	if(info<0){
	716	fprintf(stderr, "argument %d of dsytrf illegal in sba_Axb_BK()\n", -info);
	717	exit(1);
	718	}
	719	else{
	720	fprintf(stderr, "singular block diagonal matrix D for dsytrf in sba_Axb_BK() [D(%d, %d) is zero]\n", info, info);
	721	return 0;
	722	}
	723	}
	724
	725	/* solve the system with the computed factorization */
	726	F77_FUNC(dsytrs)("U", (int )&m, (int )&nrhs, a, (int )&m, ipiv, b, (int )&m, (int *)&info);
	727	if(info!=0){
	728	if(info<0){
	729	fprintf(stderr, "argument %d of dsytrs illegal in sba_Axb_BK()\n", -info);
	730	exit(1);
	731	}
	732	else{
	733	fprintf(stderr, "unknown error for dsytrs in sba_Axb_BK()\n");
	734	return 0;
	735	}
	736	}
	737
	738	/* copy the result in x */
	739	for(i=0; i<m; i++){
	740	x[i]=b[i];
	741	}
	742
	743	return 1;
	744	}
	745
	746	/*
	747	* This function computes the inverse of a square matrix A into B
	748	* using LU decomposition
	749	*
	750	* The function returns 0 in case of error (e.g. A is singular),
	751	* 1 if successfull
	752	*
	753	* This function is often called repetitively to solve problems of identical
	754	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	755	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	756	*/
	757	int sba_mat_invert_LU(double A, double B, int m)
	758	{
	759	static double *buf=NULL;
	760	static int buf_sz=0;
	761
	762	int a_sz, ipiv_sz, work_sz, tot_sz;
	763	register int i, j;
	764	int info, *ipiv;
	765	double a, work;
	766
	767	/* calculate required memory size */
	768	ipiv_sz=m;
	769	a_sz=m*m;
	770	work_sz=16m; / this is probably too much */
	771	tot_sz=ipiv_szsizeof(int) + (a_sz + work_sz)sizeof(double);
	772
	773	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	774	if(buf) free(buf); /* free previously allocated memory */
	775
	776	buf_sz=tot_sz;
	777	buf=(double *)malloc(buf_sz);
	778	if(!buf){
	779	fprintf(stderr, "memory allocation in sba_mat_invert_LU() failed!\n");
	780	exit(1);
	781	}
	782	}
	783
	784	ipiv=(int *)buf;
	785	a=(double *)(ipiv + ipiv_sz);
	786	work=a+a_sz;
	787
	788	/* store A (column major!) into a */
	789	for(i=0; i<m; i++)
	790	for(j=0; j<m; j++)
	791	a[i+jm]=A[im+j];
	792
	793	/* LU decomposition for A */
	794	F77_FUNC(dgetrf)((int )&m, (int )&m, a, (int )&m, ipiv, (int )&info);
	795	if(info!=0){
	796	if(info<0){
	797	fprintf(stderr, "argument %d of dgetrf illegal in sba_mat_invert_LU()\n", -info);
	798	exit(1);
	799	}
	800	else{
	801	fprintf(stderr, "singular matrix A for dgetrf in sba_mat_invert_LU()\n");
	802	return 0;
	803	}
	804	}
	805
	806	/* (A)^{-1} from LU */
	807	F77_FUNC(dgetri)((int )&m, a, (int )&m, ipiv, work, (int )&work_sz, (int )&info);
	808	if(info!=0){
	809	if(info<0){
	810	fprintf(stderr, "argument %d of dgetri illegal in sba_mat_invert_LU()\n", -info);
	811	exit(1);
	812	}
	813	else{
	814	fprintf(stderr, "singular matrix A for dgetri in sba_mat_invert_LU()\n");
	815	return 0;
	816	}
	817	}
	818
	819	/* store (A)^{-1} in B */
	820	for(i=0; i<m; i++)
	821	for(j=0; j<m; j++)
	822	B[im+j]=a[i+jm];
	823
	824	return 1;
	825	}
	826
	827	/*
	828	* This function computes the inverse of a square symmetric positive definite
	829	* matrix A into B using Cholesky factorization
	830	*
	831	* The function returns 0 in case of error (e.g. A is not positive definite or singular),
	832	* 1 if successfull
	833	*
	834	* This function is often called repetitively to solve problems of identical
	835	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	836	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	837	*/
	838	int sba_mat_invert_Chol(double A, double B, int m)
	839	{
	840	static double *buf=NULL;
	841	static int buf_sz=0;
	842
	843	int a_sz, tot_sz;
	844	register int i, j;
	845	int info;
	846	double *a;
	847
	848	/* calculate required memory size */
	849	a_sz=m*m;
	850	tot_sz=a_sz;
	851
	852	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	853	if(buf) free(buf); /* free previously allocated memory */
	854
	855	buf_sz=tot_sz;
	856	buf=(double )malloc(buf_szsizeof(double));
	857	if(!buf){
	858	fprintf(stderr, "memory allocation in sba_mat_invert_Chol() failed!\n");
	859	exit(1);
	860	}
	861	}
	862
	863	a=(double *)buf;
	864
	865	/* store A (column major!) into a */
	866	for(i=0; i<m; i++)
	867	for(j=0; j<m; j++)
	868	a[i+jm]=A[im+j];
	869
	870	/* Cholesky factorization for A */
	871	F77_FUNC(dpotrf)("L", (int )&m, a, (int )&m, (int *)&info);
	872	/* error treatment */
	873	if(info!=0){
	874	if(info<0){
	875	fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrf in sba_mat_invert_Chol()\n", -info);
	876	exit(1);
	877	}
	878	else{
	879	fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for dpotrf in sba_mat_invert_Chol()\n", info);
	880	return 0;
	881	}
	882	}
	883
	884	/* (A)^{-1} from Cholesky */
	885	F77_FUNC(dpotri)("L", (int )&m, a, (int )&m, (int *)&info);
	886	if(info!=0){
	887	if(info<0){
	888	fprintf(stderr, "argument %d of dpotri illegal in sba_mat_invert_Chol()\n", -info);
	889	exit(1);
	890	}
	891	else{
	892	fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in sba_mat_invert_Chol()\n", info, info);
	893	return 0;
	894	}
	895	}
	896
	897	/* store (A)^{-1} in B. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */
	898	for(i=0; i<m; i++)
	899	for(j=0; j<=i; j++)
	900	B[im+j]=B[jm+i]=a[i+j*m];
	901
	902	return 1;
	903	}
	904
	905
	906	#define __CG_LINALG_BLOCKSIZE 8
	907
	908	/* Dot product of two vectors x and y using loop unrolling and blocking.
	909	* see http://www.abarnett.demon.co.uk/tutorial.html
	910	*/
	911
	912	inline static double dprod(int n, double x, double y)
	913	{
	914	register int i, j1, j2, j3, j4, j5, j6, j7;
	915	int blockn;
	916	register double sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0,
	917	sum4=0.0, sum5=0.0, sum6=0.0, sum7=0.0;
	918
	919	/* n may not be divisible by __CG_LINALG_BLOCKSIZE,
	920	* go as near as we can first, then tidy up.
	921	*/
	922	blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE;
	923
	924	/* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */
	925	for(i=0; i<blockn; i+=__CG_LINALG_BLOCKSIZE){
	926	sum0+=x[i]*y[i];
	927	j1=i+1; sum1+=x[j1]*y[j1];
	928	j2=i+2; sum2+=x[j2]*y[j2];
	929	j3=i+3; sum3+=x[j3]*y[j3];
	930	j4=i+4; sum4+=x[j4]*y[j4];
	931	j5=i+5; sum5+=x[j5]*y[j5];
	932	j6=i+6; sum6+=x[j6]*y[j6];
	933	j7=i+7; sum7+=x[j7]*y[j7];
	934	}
	935
	936	/*
	937	* There may be some left to do.
	938	* This could be done as a simple for() loop,
	939	* but a switch is faster (and more interesting)
	940	*/
	941
	942	if(i<n){
	943	/* Jump into the case at the place that will allow
	944	* us to finish off the appropriate number of items.
	945	*/
	946
	947	switch(n - i){
	948	case 7 : sum0+=x[i]*y[i]; ++i;
	949	case 6 : sum1+=x[i]*y[i]; ++i;
	950	case 5 : sum2+=x[i]*y[i]; ++i;
	951	case 4 : sum3+=x[i]*y[i]; ++i;
	952	case 3 : sum4+=x[i]*y[i]; ++i;
	953	case 2 : sum5+=x[i]*y[i]; ++i;
	954	case 1 : sum6+=x[i]*y[i]; ++i;
	955	}
	956	}
	957
	958	return sum0+sum1+sum2+sum3+sum4+sum5+sum6+sum7;
	959	}
	960
	961
	962	/* Compute z=x+a*y for two vectors x and y and a scalar a; z can be one of x, y.
	963	* Similarly to the dot product routine, this one uses loop unrolling and blocking
	964	*/
	965
	966	inline static void daxpy(int n, double z, double x, double a, double *y)
	967	{
	968	register int i, j1, j2, j3, j4, j5, j6, j7;
	969	int blockn;
	970
	971	/* n may not be divisible by __CG_LINALG_BLOCKSIZE,
	972	* go as near as we can first, then tidy up.
	973	*/
	974	blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE;
	975
	976	/* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */
	977	for(i=0; i<blockn; i+=__CG_LINALG_BLOCKSIZE){
	978	z[i]=x[i]+a*y[i];
	979	j1=i+1; z[j1]=x[j1]+a*y[j1];
	980	j2=i+2; z[j2]=x[j2]+a*y[j2];
	981	j3=i+3; z[j3]=x[j3]+a*y[j3];
	982	j4=i+4; z[j4]=x[j4]+a*y[j4];
	983	j5=i+5; z[j5]=x[j5]+a*y[j5];
	984	j6=i+6; z[j6]=x[j6]+a*y[j6];
	985	j7=i+7; z[j7]=x[j7]+a*y[j7];
	986	}
	987
	988	/*
	989	* There may be some left to do.
	990	* This could be done as a simple for() loop,
	991	* but a switch is faster (and more interesting)
	992	*/
	993
	994	if(i<n){
	995	/* Jump into the case at the place that will allow
	996	* us to finish off the appropriate number of items.
	997	*/
	998
	999	switch(n - i){
	1000	case 7 : z[i]=x[i]+a*y[i]; ++i;
	1001	case 6 : z[i]=x[i]+a*y[i]; ++i;
	1002	case 5 : z[i]=x[i]+a*y[i]; ++i;
	1003	case 4 : z[i]=x[i]+a*y[i]; ++i;
	1004	case 3 : z[i]=x[i]+a*y[i]; ++i;
	1005	case 2 : z[i]=x[i]+a*y[i]; ++i;
	1006	case 1 : z[i]=x[i]+a*y[i]; ++i;
	1007	}
	1008	}
	1009	}
	1010
	1011	/*
	1012	* This function returns the solution of Ax = b where A is posititive definite,
	1013	* based on the conjugate gradients method; see "An intro to the CG method" by J.R. Shewchuk, p. 50-51
	1014	*
	1015	* A is mxm, b, x are is mx1. Argument niter specifies the maximum number of
	1016	* iterations and eps is the desired solution accuracy. niter<0 signals that
	1017	* x contains a valid initial approximation to the solution; if niter>0 then
	1018	* the starting point is taken to be zero. Argument prec selects the desired
	1019	* preconditioning method as follows:
	1020	* 0: no preconditioning
	1021	* 1: jacobi (diagonal) preconditioning
	1022	* 2: SSOR preconditioning
	1023	* Argument iscolmaj specifies whether A is stored in column or row major order.
	1024	*
	1025	* The function returns 0 in case of error,
	1026	* the number of iterations performed if successfull
	1027	*
	1028	* This function is often called repetitively to solve problems of identical
	1029	* dimensions. To avoid repetitive malloc's and free's, allocated memory is
	1030	* retained between calls and free'd-malloc'ed when not of the appropriate size.
	1031	*/
	1032	int sba_Axb_CG(double A, double B, double *x, int m, int niter, double eps, int prec, int iscolmaj)
	1033	{
	1034	static double *buf=NULL;
	1035	static int buf_sz=0;
	1036
	1037	register int i, j;
	1038	register double *aim;
	1039	int iter, a_sz, res_sz, d_sz, q_sz, s_sz, wk_sz, z_sz, tot_sz;
	1040	double a, res, d, q, s, wk, *z;
	1041	double delta0, deltaold, deltanew, alpha, beta, eps_sq=eps*eps;
	1042	register double sum;
	1043	int rec_res;
	1044
	1045	/* calculate required memory size */
	1046	a_sz=(iscolmaj)? m*m : 0;
	1047	res_sz=m; d_sz=m; q_sz=m;
	1048	if(prec!=SBA_CG_NOPREC){
	1049	s_sz=m; wk_sz=m;
	1050	z_sz=(prec==SBA_CG_SSOR)? m : 0;
	1051	}
	1052	else
	1053	s_sz=wk_sz=z_sz=0;
	1054
	1055	tot_sz=a_sz+res_sz+d_sz+q_sz+s_sz+wk_sz+z_sz;
	1056
	1057	if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
	1058	if(buf) free(buf); /* free previously allocated memory */
	1059
	1060	buf_sz=tot_sz;
	1061	buf=(double )malloc(buf_szsizeof(double));
	1062	if(!buf){
	1063	fprintf(stderr, "memory allocation request failed in sba_Axb_CG()\n");
	1064	exit(1);
	1065	}
	1066	}
	1067
	1068	if(iscolmaj){
	1069	a=buf;
	1070	/* store A (row major!) into a */
	1071	for(i=0; i<m; ++i)
	1072	for(j=0, aim=a+i*m; j<m; ++j)
	1073	aim[j]=A[i+j*m];
	1074	}
	1075	else a=A; /* no copying required */
	1076
	1077	res=buf+a_sz;
	1078	d=res+res_sz;
	1079	q=d+d_sz;
	1080	if(prec!=SBA_CG_NOPREC){
	1081	s=q+q_sz;
	1082	wk=s+s_sz;
	1083	z=(prec==SBA_CG_SSOR)? wk+wk_sz : NULL;
	1084
	1085	for(i=0; i<m; ++i){ // compute jacobi (i.e. diagonal) preconditioners and save them in wk
	1086	sum=a[i*m+i];
	1087	if(sum>DBL_EPSILON \|\| -sum<-DBL_EPSILON) // != 0.0
	1088	wk[i]=1.0/sum;
	1089	else
	1090	wk[i]=1.0/DBL_EPSILON;
	1091	}
	1092	}
	1093	else{
	1094	s=res;
	1095	wk=z=NULL;
	1096	}
	1097
	1098	if(niter>0)
	1099	for(i=0; i<m; ++i){ // clear solution and initialize residual vector: res <-- B
	1100	x[i]=0.0;
	1101	res[i]=B[i];
	1102	}
	1103	else{
	1104	niter=-niter;
	1105
	1106	for(i=0; i<m; ++i){ // initialize residual vector: res <-- B - A*x
	1107	for(j=0, aim=a+i*m, sum=0.0; j<m; ++j)
	1108	sum+=aim[j]*x[j];
	1109	res[i]=B[i]-sum;
	1110	}
	1111	}
	1112
	1113	switch(prec){
	1114	case SBA_CG_NOPREC:
	1115	for(i=0, deltanew=0.0; i<m; ++i){
	1116	d[i]=res[i];
	1117	deltanew+=res[i]*res[i];
	1118	}
	1119	break;
	1120	case SBA_CG_JACOBI: // jacobi preconditioning
	1121	for(i=0, deltanew=0.0; i<m; ++i){
	1122	d[i]=res[i]*wk[i];
	1123	deltanew+=res[i]*d[i];
	1124	}
	1125	break;
	1126	case SBA_CG_SSOR: // SSOR preconditioning; see the "templates" book, fig. 3.2, p. 44
	1127	for(i=0; i<m; ++i){
	1128	for(j=0, sum=0.0, aim=a+i*m; j<i; ++j)
	1129	sum+=aim[j]*z[j];
	1130	z[i]=wk[i]*(res[i]-sum);
	1131	}
	1132
	1133	for(i=m-1; i>=0; --i){
	1134	for(j=i+1, sum=0.0, aim=a+i*m; j<m; ++j)
	1135	sum+=aim[j]*d[j];
	1136	d[i]=z[i]-wk[i]*sum;
	1137	}
	1138	deltanew=dprod(m, res, d);
	1139	break;
	1140	default:
	1141	fprintf(stderr, "unknown preconditioning option %d in sba_Axb_CG\n", prec);
	1142	exit(1);
	1143	}
	1144
	1145	delta0=deltanew;
	1146
	1147	for(iter=1; deltanew>eps_sq*delta0 && iter<=niter; ++iter){
	1148	for(i=0; i<m; ++i){ // q <-- A d
	1149	aim=a+i*m;
	1150	/***
	1151	for(j=0, sum=0.0; j<m; ++j)
	1152	sum+=aim[j]*d[j];
	1153	***/
	1154	q[i]=dprod(m, aim, d); //sum;
	1155	}
	1156
	1157	/***
	1158	for(i=0, sum=0.0; i<m; ++i)
	1159	sum+=d[i]*q[i];
	1160	***/
	1161	alpha=deltanew/dprod(m, d, q); // deltanew/sum;
	1162
	1163	/***
	1164	for(i=0; i<m; ++i)
	1165	x[i]+=alpha*d[i];
	1166	***/
	1167	daxpy(m, x, x, alpha, d);
	1168
	1169	if(!(iter%50)){
	1170	for(i=0; i<m; ++i){ // accurate computation of the residual vector
	1171	aim=a+i*m;
	1172	/***
	1173	for(j=0, sum=0.0; j<m; ++j)
	1174	sum+=aim[j]*x[j];
	1175	***/
	1176	res[i]=B[i]-dprod(m, aim, x); //B[i]-sum;
	1177	}
	1178	rec_res=0;
	1179	}
	1180	else{
	1181	/***
	1182	for(i=0; i<m; ++i) // approximate computation of the residual vector
	1183	res[i]-=alpha*q[i];
	1184	***/
	1185	daxpy(m, res, res, -alpha, q);
	1186	rec_res=1;
	1187	}
	1188
	1189	if(prec){
	1190	switch(prec){
	1191	case SBA_CG_JACOBI: // jacobi
	1192	for(i=0; i<m; ++i)
	1193	s[i]=res[i]*wk[i];
	1194	break;
	1195	case SBA_CG_SSOR: // SSOR
	1196	for(i=0; i<m; ++i){
	1197	for(j=0, sum=0.0, aim=a+i*m; j<i; ++j)
	1198	sum+=aim[j]*z[j];
	1199	z[i]=wk[i]*(res[i]-sum);
	1200	}
	1201
	1202	for(i=m-1; i>=0; --i){
	1203	for(j=i+1, sum=0.0, aim=a+i*m; j<m; ++j)
	1204	sum+=aim[j]*s[j];
	1205	s[i]=z[i]-wk[i]*sum;
	1206	}
	1207	break;
	1208	}
	1209	}
	1210
	1211	deltaold=deltanew;
	1212	/***
	1213	for(i=0, sum=0.0; i<m; ++i)
	1214	sum+=res[i]*s[i];
	1215	***/
	1216	deltanew=dprod(m, res, s); //sum;
	1217
	1218	/* make sure that we get around small delta that are due to
	1219	* accumulated floating point roundoff errors
	1220	*/
	1221	if(rec_res && deltanew<=eps_sq*delta0){
	1222	/* analytically recompute delta */
	1223	for(i=0; i<m; ++i){
	1224	for(j=0, aim=a+i*m, sum=0.0; j<m; ++j)
	1225	sum+=aim[j]*x[j];
	1226	res[i]=B[i]-sum;
	1227	}
	1228	deltanew=dprod(m, res, s);
	1229	}
	1230
	1231	beta=deltanew/deltaold;
	1232
	1233	/***
	1234	for(i=0; i<m; ++i)
	1235	d[i]=s[i]+beta*d[i];
	1236	***/
	1237	daxpy(m, d, s, beta, d);
	1238	}
	1239
	1240	return iter;
	1241	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: proiecte/pmake3d/make3d_original/Make3dSingleImageStanford_version0.1/third_party/sba-1.3/sba_lapack.c @ 37

Download in other formats: