Version 0.1
00001 /* lsqr.c 00002 $Revision: 273 $ $Date: 2006-09-04 15:59:04 -0700 (Mon, 04 Sep 2006) $ 00003 */ 00012 #include <stdlib.h> 00013 #include <string.h> 00014 #include <stdio.h> 00015 #include <stdbool.h> 00016 #include <math.h> 00017 00018 #ifdef __APPLE__ 00019 #include <vecLib/vecLib.h> 00020 #else 00021 #include <cblas.h> 00022 #endif 00023 00024 #define ZERO 0.0 00025 #define ONE 1.0 00026 00027 // --------------------------------------------------------------------- 00028 // d2norm returns sqrt( a**2 + b**2 ) with precautions 00029 // to avoid overflow. 00030 // 00031 // 21 Mar 1990: First version. 00032 // --------------------------------------------------------------------- 00033 static double 00034 d2norm( const double a, const double b ) 00035 { 00036 double scale; 00037 const double zero = 0.0; 00038 00039 scale = fabs( a ) + fabs( b ); 00040 if (scale == zero) 00041 return zero; 00042 else 00043 return scale * sqrt( (a/scale)*(a/scale) + (b/scale)*(b/scale) ); 00044 } 00045 00046 static void 00047 dload( const int n, const double alpha, double x[] ) 00048 { 00049 int i; 00050 for (i = 0; i < n; i++) x[i] = alpha; 00051 return; 00052 } 00053 00054 // --------------------------------------------------------------------- 00055 // LSQR 00056 // --------------------------------------------------------------------- 00057 void lsqr( 00058 int m, 00059 int n, 00060 void (*aprod)(int mode, int m, int n, double x[], double y[], 00061 void *UsrWrk), 00062 double damp, 00063 void *UsrWrk, 00064 double u[], // len = m 00065 double v[], // len = n 00066 double w[], // len = n 00067 double x[], // len = n 00068 double se[], // len at least n. May be NULL. 00069 double atol, 00070 double btol, 00071 double conlim, 00072 int itnlim, 00073 FILE *nout, 00074 // The remaining variables are output only. 00075 int *istop_out, 00076 int *itn_out, 00077 double *anorm_out, 00078 double *acond_out, 00079 double *rnorm_out, 00080 double *arnorm_out, 00081 double *xnorm_out 00082 ) 00083 { 00084 // ------------------------------------------------------------------ 00085 // 00086 // LSQR finds a solution x to the following problems: 00087 // 00088 // 1. Unsymmetric equations -- solve A*x = b 00089 // 00090 // 2. Linear least squares -- solve A*x = b 00091 // in the least-squares sense 00092 // 00093 // 3. Damped least squares -- solve ( A )*x = ( b ) 00094 // ( damp*I ) ( 0 ) 00095 // in the least-squares sense 00096 // 00097 // where A is a matrix with m rows and n columns, b is an 00098 // m-vector, and damp is a scalar. (All quantities are real.) 00099 // The matrix A is intended to be large and sparse. It is accessed 00100 // by means of subroutine calls of the form 00101 // 00102 // aprod ( mode, m, n, x, y, UsrWrk ) 00103 // 00104 // which must perform the following functions: 00105 // 00106 // If mode = 1, compute y = y + A*x. 00107 // If mode = 2, compute x = x + A(transpose)*y. 00108 // 00109 // The vectors x and y are input parameters in both cases. 00110 // If mode = 1, y should be altered without changing x. 00111 // If mode = 2, x should be altered without changing y. 00112 // The parameter UsrWrk may be used for workspace as described 00113 // below. 00114 // 00115 // The rhs vector b is input via u, and subsequently overwritten. 00116 // 00117 // 00118 // Note: LSQR uses an iterative method to approximate the solution. 00119 // The number of iterations required to reach a certain accuracy 00120 // depends strongly on the scaling of the problem. Poor scaling of 00121 // the rows or columns of A should therefore be avoided where 00122 // possible. 00123 // 00124 // For example, in problem 1 the solution is unaltered by 00125 // row-scaling. If a row of A is very small or large compared to 00126 // the other rows of A, the corresponding row of ( A b ) should be 00127 // scaled up or down. 00128 // 00129 // In problems 1 and 2, the solution x is easily recovered 00130 // following column-scaling. Unless better information is known, 00131 // the nonzero columns of A should be scaled so that they all have 00132 // the same Euclidean norm (e.g., 1.0). 00133 // 00134 // In problem 3, there is no freedom to re-scale if damp is 00135 // nonzero. However, the value of damp should be assigned only 00136 // after attention has been paid to the scaling of A. 00137 // 00138 // The parameter damp is intended to help regularize 00139 // ill-conditioned systems, by preventing the true solution from 00140 // being very large. Another aid to regularization is provided by 00141 // the parameter acond, which may be used to terminate iterations 00142 // before the computed solution becomes very large. 00143 // 00144 // Note that x is not an input parameter. 00145 // If some initial estimate x0 is known and if damp = 0, 00146 // one could proceed as follows: 00147 // 00148 // 1. Compute a residual vector r0 = b - A*x0. 00149 // 2. Use LSQR to solve the system A*dx = r0. 00150 // 3. Add the correction dx to obtain a final solution x = x0 + dx. 00151 // 00152 // This requires that x0 be available before and after the call 00153 // to LSQR. To judge the benefits, suppose LSQR takes k1 iterations 00154 // to solve A*x = b and k2 iterations to solve A*dx = r0. 00155 // If x0 is "good", norm(r0) will be smaller than norm(b). 00156 // If the same stopping tolerances atol and btol are used for each 00157 // system, k1 and k2 will be similar, but the final solution x0 + dx 00158 // should be more accurate. The only way to reduce the total work 00159 // is to use a larger stopping tolerance for the second system. 00160 // If some value btol is suitable for A*x = b, the larger value 00161 // btol*norm(b)/norm(r0) should be suitable for A*dx = r0. 00162 // 00163 // Preconditioning is another way to reduce the number of iterations. 00164 // If it is possible to solve a related system M*x = b efficiently, 00165 // where M approximates A in some helpful way 00166 // (e.g. M - A has low rank or its elements are small relative to 00167 // those of A), LSQR may converge more rapidly on the system 00168 // A*M(inverse)*z = b, 00169 // after which x can be recovered by solving M*x = z. 00170 // 00171 // NOTE: If A is symmetric, LSQR should not be used! 00172 // Alternatives are the symmetric conjugate-gradient method (cg) 00173 // and/or SYMMLQ. 00174 // SYMMLQ is an implementation of symmetric cg that applies to 00175 // any symmetric A and will converge more rapidly than LSQR. 00176 // If A is positive definite, there are other implementations of 00177 // symmetric cg that require slightly less work per iteration 00178 // than SYMMLQ (but will take the same number of iterations). 00179 // 00180 // 00181 // Notation 00182 // -------- 00183 // 00184 // The following quantities are used in discussing the subroutine 00185 // parameters: 00186 // 00187 // Abar = ( A ), bbar = ( b ) 00188 // ( damp*I ) ( 0 ) 00189 // 00190 // r = b - A*x, rbar = bbar - Abar*x 00191 // 00192 // rnorm = sqrt( norm(r)**2 + damp**2 * norm(x)**2 ) 00193 // = norm( rbar ) 00194 // 00195 // relpr = the relative precision of floating-point arithmetic 00196 // on the machine being used. On most machines, 00197 // relpr is about 1.0e-7 and 1.0d-16 in single and double 00198 // precision respectively. 00199 // 00200 // LSQR minimizes the function rnorm with respect to x. 00201 // 00202 // 00203 // Parameters 00204 // ---------- 00205 // 00206 // m input m, the number of rows in A. 00207 // 00208 // n input n, the number of columns in A. 00209 // 00210 // aprod external See above. 00211 // 00212 // damp input The damping parameter for problem 3 above. 00213 // (damp should be 0.0 for problems 1 and 2.) 00214 // If the system A*x = b is incompatible, values 00215 // of damp in the range 0 to sqrt(relpr)*norm(A) 00216 // will probably have a negligible effect. 00217 // Larger values of damp will tend to decrease 00218 // the norm of x and reduce the number of 00219 // iterations required by LSQR. 00220 // 00221 // The work per iteration and the storage needed 00222 // by LSQR are the same for all values of damp. 00223 // 00224 // rw workspace Transit pointer to user's workspace. 00225 // Note: LSQR does not explicitly use this 00226 // parameter, but passes it to subroutine aprod for 00227 // possible use as workspace. 00228 // 00229 // u(m) input The rhs vector b. Beware that u is 00230 // over-written by LSQR. 00231 // 00232 // v(n) workspace 00233 // 00234 // w(n) workspace 00235 // 00236 // x(n) output Returns the computed solution x. 00237 // 00238 // se(*) output If m .gt. n or damp .gt. 0, the system is 00239 // (maybe) overdetermined and the standard errors may be 00240 // useful. (See the first LSQR reference.) 00241 // Otherwise (m .le. n and damp = 0) they do not 00242 // mean much. Some time and storage can be saved 00243 // by setting se = NULL. In that case, se will 00244 // not be touched. 00245 // 00246 // If se is not NULL, then the dimension of se must 00247 // be n or more. se(1:n) then returns standard error 00248 // estimates for the components of x. 00249 // For each i, se(i) is set to the value 00250 // rnorm * sqrt( sigma(i,i) / t ), 00251 // where sigma(i,i) is an estimate of the i-th 00252 // diagonal of the inverse of Abar(transpose)*Abar 00253 // and t = 1 if m .le. n, 00254 // t = m - n if m .gt. n and damp = 0, 00255 // t = m if damp .ne. 0. 00256 // 00257 // atol input An estimate of the relative error in the data 00258 // defining the matrix A. For example, 00259 // if A is accurate to about 6 digits, set 00260 // atol = 1.0e-6 . 00261 // 00262 // btol input An estimate of the relative error in the data 00263 // defining the rhs vector b. For example, 00264 // if b is accurate to about 6 digits, set 00265 // btol = 1.0e-6 . 00266 // 00267 // conlim input An upper limit on cond(Abar), the apparent 00268 // condition number of the matrix Abar. 00269 // Iterations will be terminated if a computed 00270 // estimate of cond(Abar) exceeds conlim. 00271 // This is intended to prevent certain small or 00272 // zero singular values of A or Abar from 00273 // coming into effect and causing unwanted growth 00274 // in the computed solution. 00275 // 00276 // conlim and damp may be used separately or 00277 // together to regularize ill-conditioned systems. 00278 // 00279 // Normally, conlim should be in the range 00280 // 1000 to 1/relpr. 00281 // Suggested value: 00282 // conlim = 1/(100*relpr) for compatible systems, 00283 // conlim = 1/(10*sqrt(relpr)) for least squares. 00284 // 00285 // Note: If the user is not concerned about the parameters 00286 // atol, btol and conlim, any or all of them may be set 00287 // to zero. The effect will be the same as the values 00288 // relpr, relpr and 1/relpr respectively. 00289 // 00290 // itnlim input An upper limit on the number of iterations. 00291 // Suggested value: 00292 // itnlim = n/2 for well-conditioned systems 00293 // with clustered singular values, 00294 // itnlim = 4*n otherwise. 00295 // 00296 // nout input File number for printed output. If positive, 00297 // a summary will be printed on file nout. 00298 // 00299 // istop output An integer giving the reason for termination: 00300 // 00301 // 0 x = 0 is the exact solution. 00302 // No iterations were performed. 00303 // 00304 // 1 The equations A*x = b are probably 00305 // compatible. Norm(A*x - b) is sufficiently 00306 // small, given the values of atol and btol. 00307 // 00308 // 2 damp is zero. The system A*x = b is probably 00309 // not compatible. A least-squares solution has 00310 // been obtained that is sufficiently accurate, 00311 // given the value of atol. 00312 // 00313 // 3 damp is nonzero. A damped least-squares 00314 // solution has been obtained that is sufficiently 00315 // accurate, given the value of atol. 00316 // 00317 // 4 An estimate of cond(Abar) has exceeded 00318 // conlim. The system A*x = b appears to be 00319 // ill-conditioned. Otherwise, there could be an 00320 // error in subroutine aprod. 00321 // 00322 // 5 The iteration limit itnlim was reached. 00323 // 00324 // itn output The number of iterations performed. 00325 // 00326 // anorm output An estimate of the Frobenius norm of Abar. 00327 // This is the square-root of the sum of squares 00328 // of the elements of Abar. 00329 // If damp is small and if the columns of A 00330 // have all been scaled to have length 1.0, 00331 // anorm should increase to roughly sqrt(n). 00332 // A radically different value for anorm may 00333 // indicate an error in subroutine aprod (there 00334 // may be an inconsistency between modes 1 and 2). 00335 // 00336 // acond output An estimate of cond(Abar), the condition 00337 // number of Abar. A very high value of acond 00338 // may again indicate an error in aprod. 00339 // 00340 // rnorm output An estimate of the final value of norm(rbar), 00341 // the function being minimized (see notation 00342 // above). This will be small if A*x = b has 00343 // a solution. 00344 // 00345 // arnorm output An estimate of the final value of 00346 // norm( Abar(transpose)*rbar ), the norm of 00347 // the residual for the usual normal equations. 00348 // This should be small in all cases. (arnorm 00349 // will often be smaller than the true value 00350 // computed from the output vector x.) 00351 // 00352 // xnorm output An estimate of the norm of the final 00353 // solution vector x. 00354 // 00355 // 00356 // Subroutines and functions used 00357 // ------------------------------ 00358 // 00359 // USER aprod 00360 // CBLAS dcopy, dnrm2, dscal (see Lawson et al. below) 00361 // 00362 // 00363 // References 00364 // ---------- 00365 // 00366 // C.C. Paige and M.A. Saunders, LSQR: An algorithm for sparse 00367 // linear equations and sparse least squares, 00368 // ACM Transactions on Mathematical Software 8, 1 (March 1982), 00369 // pp. 43-71. 00370 // 00371 // C.C. Paige and M.A. Saunders, Algorithm 583, LSQR: Sparse 00372 // linear equations and least-squares problems, 00373 // ACM Transactions on Mathematical Software 8, 2 (June 1982), 00374 // pp. 195-209. 00375 // 00376 // C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh, 00377 // Basic linear algebra subprograms for Fortran usage, 00378 // ACM Transactions on Mathematical Software 5, 3 (Sept 1979), 00379 // pp. 308-323 and 324-325. 00380 // ------------------------------------------------------------------ 00381 // 00382 // 00383 // LSQR development: 00384 // 22 Feb 1982: LSQR sent to ACM TOMS to become Algorithm 583. 00385 // 15 Sep 1985: Final F66 version. LSQR sent to "misc" in netlib. 00386 // 13 Oct 1987: Bug (Robert Davies, DSIR). Have to delete 00387 // if ( (one + dabs(t)) .le. one ) GO TO 200 00388 // from loop 200. The test was an attempt to reduce 00389 // underflows, but caused w(i) not to be updated. 00390 // 17 Mar 1989: First F77 version. 00391 // 04 May 1989: Bug (David Gay, AT&T). When the second beta is zero, 00392 // rnorm = 0 and 00393 // test2 = arnorm / (anorm * rnorm) overflows. 00394 // Fixed by testing for rnorm = 0. 00395 // 05 May 1989: Sent to "misc" in netlib. 00396 // 14 Mar 1990: Bug (John Tomlin via IBM OSL testing). 00397 // Setting rhbar2 = rhobar**2 + dampsq can give zero 00398 // if rhobar underflows and damp = 0. 00399 // Fixed by testing for damp = 0 specially. 00400 // 15 Mar 1990: Converted to lower case. 00401 // 21 Mar 1990: d2norm introduced to avoid overflow in numerous 00402 // items like c = sqrt( a**2 + b**2 ). 00403 // 04 Sep 1991: wantse added as an argument to LSQR, to make 00404 // standard errors optional. This saves storage and 00405 // time when se(*) is not wanted. 00406 // 13 Feb 1992: istop now returns a value in [1,5], not [1,7]. 00407 // 1, 2 or 3 means that x solves one of the problems 00408 // Ax = b, min norm(Ax - b) or damped least squares. 00409 // 4 means the limit on cond(A) was reached. 00410 // 5 means the limit on iterations was reached. 00411 // 07 Dec 1994: Keep track of dxmax = max_k norm( phi_k * d_k ). 00412 // So far, this is just printed at the end. 00413 // A large value (relative to norm(x)) indicates 00414 // significant cancellation in forming 00415 // x = D*f = sum( phi_k * d_k ). 00416 // A large column of D need NOT be serious if the 00417 // corresponding phi_k is small. 00418 // 27 Dec 1994: Include estimate of alfa_opt in iteration log. 00419 // alfa_opt is the optimal scale factor for the 00420 // residual in the "augmented system", as described by 00421 // A. Bjorck (1992), 00422 // Pivoting and stability in the augmented system method, 00423 // in D. F. Griffiths and G. A. Watson (eds.), 00424 // "Numerical Analysis 1991", 00425 // Proceedings of the 14th Dundee Conference, 00426 // Pitman Research Notes in Mathematics 260, 00427 // Longman Scientific and Technical, Harlow, Essex, 1992. 00428 // 14 Apr 2006: "Line-by-line" conversion to ISO C by 00429 // Michael P. Friedlander. 00430 // 00431 // 00432 // Michael A. Saunders mike@sol-michael.stanford.edu 00433 // Dept of Operations Research na.Msaunders@na-net.ornl.gov 00434 // Stanford University 00435 // Stanford, CA 94305-4022 (415) 723-1875 00436 //----------------------------------------------------------------------- 00437 00438 // Local copies of output variables. Output vars are assigned at exit. 00439 int 00440 istop = 0, 00441 itn = 0; 00442 double 00443 anorm = ZERO, 00444 acond = ZERO, 00445 rnorm = ZERO, 00446 arnorm = ZERO, 00447 xnorm = ZERO; 00448 00449 // Local variables 00450 00451 const bool 00452 extra = false, // true for extra printing below. 00453 damped = damp > ZERO, 00454 wantse = se != NULL; 00455 int 00456 i, maxdx, nconv, nstop; 00457 double 00458 alfopt, alpha, arnorm0, beta, bnorm, 00459 cs, cs1, cs2, ctol, 00460 delta, dknorm, dnorm, dxk, dxmax, 00461 gamma, gambar, phi, phibar, psi, 00462 res2, rho, rhobar, rhbar1, 00463 rhs, rtol, sn, sn1, sn2, 00464 t, tau, temp, test1, test2, test3, 00465 theta, t1, t2, t3, xnorm1, z, zbar; 00466 char 00467 enter[] = "Enter LSQR. ", 00468 exit[] = "Exit LSQR. ", 00469 msg[6][100] = 00470 { 00471 {"The exact solution is x = 0"}, 00472 {"A solution to Ax = b was found, given atol, btol"}, 00473 {"A least-squares solution was found, given atol"}, 00474 {"A damped least-squares solution was found, given atol"}, 00475 {"Cond(Abar) seems to be too large, given conlim"}, 00476 {"The iteration limit was reached"} 00477 }; 00478 //----------------------------------------------------------------------- 00479 00480 // Format strings. 00481 char fmt_1000[] = 00482 " %s Least-squares solution of Ax = b\n" 00483 " The matrix A has %7d rows and %7d columns\n" 00484 " damp = %-22.2e wantse = %10i\n" 00485 " atol = %-22.2e conlim = %10.2e\n" 00486 " btol = %-22.2e itnlim = %10d\n\n"; 00487 char fmt_1200[] = 00488 " Itn x(1) Function" 00489 " Compatible LS Norm A Cond A\n"; 00490 char fmt_1300[] = 00491 " Itn x(1) Function" 00492 " Compatible LS Norm Abar Cond Abar\n"; 00493 char fmt_1400[] = 00494 " phi dknorm dxk alfa_opt\n"; 00495 char fmt_1500_extra[] = 00496 " %6d %16.9e %16.9e %9.2e %9.2e %8.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n"; 00497 char fmt_1500[] = 00498 " %6d %16.9e %16.9e %9.2e %9.2e %8.1e %8.1e\n"; 00499 char fmt_1550[] = 00500 " %6d %16.9e %16.9e %9.2e %9.2e\n"; 00501 char fmt_1600[] = 00502 "\n"; 00503 char fmt_2000[] = 00504 "\n" 00505 " %s istop = %-10d itn = %-10d\n" 00506 " %s anorm = %11.5e acond = %11.5e\n" 00507 " %s vnorm = %11.5e xnorm = %11.5e\n" 00508 " %s rnorm = %11.5e arnorm = %11.5e\n"; 00509 char fmt_2100[] = 00510 " %s max dx = %7.1e occured at itn %-9d\n" 00511 " %s = %7.1e*xnorm\n"; 00512 char fmt_3000[] = 00513 " %s %s\n"; 00514 00515 // Initialize. 00516 00517 if (nout != NULL) 00518 fprintf(nout, fmt_1000, 00519 enter, m, n, damp, wantse, 00520 atol, conlim, btol, itnlim); 00521 00522 itn = 0; 00523 istop = 0; 00524 nstop = 0; 00525 maxdx = 0; 00526 ctol = ZERO; 00527 if (conlim > ZERO) ctol = ONE / conlim; 00528 anorm = ZERO; 00529 acond = ZERO; 00530 dnorm = ZERO; 00531 dxmax = ZERO; 00532 res2 = ZERO; 00533 psi = ZERO; 00534 xnorm = ZERO; 00535 xnorm1 = ZERO; 00536 cs2 = - ONE; 00537 sn2 = ZERO; 00538 z = ZERO; 00539 00540 // ------------------------------------------------------------------ 00541 // Set up the first vectors u and v for the bidiagonalization. 00542 // These satisfy beta*u = b, alpha*v = A(transpose)*u. 00543 // ------------------------------------------------------------------ 00544 dload( n, 0.0, v ); 00545 dload( n, 0.0, x ); 00546 00547 if ( wantse ) 00548 dload( n, 0.0, se ); 00549 00550 alpha = ZERO; 00551 beta = cblas_dnrm2 ( m, u, 1 ); 00552 00553 if (beta > ZERO) { 00554 cblas_dscal ( m, (ONE / beta), u, 1 ); 00555 aprod ( 2, m, n, v, u, UsrWrk ); 00556 alpha = cblas_dnrm2 ( n, v, 1 ); 00557 } 00558 00559 if (alpha > ZERO) { 00560 cblas_dscal ( n, (ONE / alpha), v, 1 ); 00561 cblas_dcopy ( n, v, 1, w, 1 ); 00562 } 00563 00564 arnorm = arnorm0 = alpha * beta; 00565 if (arnorm == ZERO) goto goto_800; 00566 00567 rhobar = alpha; 00568 phibar = beta; 00569 bnorm = beta; 00570 rnorm = beta; 00571 00572 if (nout != NULL) { 00573 if ( damped ) 00574 fprintf(nout, fmt_1300); 00575 else 00576 fprintf(nout, fmt_1200); 00577 00578 test1 = ONE; 00579 test2 = alpha / beta; 00580 00581 if ( extra ) 00582 fprintf(nout, fmt_1400); 00583 00584 fprintf(nout, fmt_1550, itn, x[0], rnorm, test1, test2); 00585 fprintf(nout, fmt_1600); 00586 } 00587 00588 00589 // ================================================================== 00590 // Main iteration loop. 00591 // ================================================================== 00592 while (1) { 00593 itn = itn + 1; 00594 00595 // ------------------------------------------------------------------ 00596 // Perform the next step of the bidiagonalization to obtain the 00597 // next beta, u, alpha, v. These satisfy the relations 00598 // beta*u = A*v - alpha*u, 00599 // alpha*v = A(transpose)*u - beta*v. 00600 // ------------------------------------------------------------------ 00601 cblas_dscal ( m, (- alpha), u, 1 ); 00602 aprod ( 1, m, n, v, u, UsrWrk ); 00603 beta = cblas_dnrm2 ( m, u, 1 ); 00604 00605 // Accumulate anorm = || Bk || 00606 // = sqrt( sum of alpha**2 + beta**2 + damp**2 ). 00607 00608 temp = d2norm( alpha, beta ); 00609 temp = d2norm( temp , damp ); 00610 anorm = d2norm( anorm, temp ); 00611 00612 if (beta > ZERO) { 00613 cblas_dscal ( m, (ONE / beta), u, 1 ); 00614 cblas_dscal ( n, (- beta), v, 1 ); 00615 aprod ( 2, m, n, v, u, UsrWrk ); 00616 alpha = cblas_dnrm2 ( n, v, 1 ); 00617 if (alpha > ZERO) { 00618 cblas_dscal ( n, (ONE / alpha), v, 1 ); 00619 } 00620 } 00621 00622 // ------------------------------------------------------------------ 00623 // Use a plane rotation to eliminate the damping parameter. 00624 // This alters the diagonal (rhobar) of the lower-bidiagonal matrix. 00625 // ------------------------------------------------------------------ 00626 rhbar1 = rhobar; 00627 if ( damped ) { 00628 rhbar1 = d2norm( rhobar, damp ); 00629 cs1 = rhobar / rhbar1; 00630 sn1 = damp / rhbar1; 00631 psi = sn1 * phibar; 00632 phibar = cs1 * phibar; 00633 } 00634 00635 // ------------------------------------------------------------------ 00636 // Use a plane rotation to eliminate the subdiagonal element (beta) 00637 // of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix. 00638 // ------------------------------------------------------------------ 00639 rho = d2norm( rhbar1, beta ); 00640 cs = rhbar1 / rho; 00641 sn = beta / rho; 00642 theta = sn * alpha; 00643 rhobar = - cs * alpha; 00644 phi = cs * phibar; 00645 phibar = sn * phibar; 00646 tau = sn * phi; 00647 00648 // ------------------------------------------------------------------ 00649 // Update x, w and (perhaps) the standard error estimates. 00650 // ------------------------------------------------------------------ 00651 t1 = phi / rho; 00652 t2 = - theta / rho; 00653 t3 = ONE / rho; 00654 dknorm = ZERO; 00655 00656 if ( wantse ) { 00657 for (i = 0; i < n; i++) { 00658 t = w[i]; 00659 x[i] = t1*t + x[i]; 00660 w[i] = t2*t + v[i]; 00661 t = (t3*t)*(t3*t); 00662 se[i] = t + se[i]; 00663 dknorm = t + dknorm; 00664 } 00665 } 00666 else { 00667 for (i = 0; i < n; i++) { 00668 t = w[i]; 00669 x[i] = t1*t + x[i]; 00670 w[i] = t2*t + v[i]; 00671 dknorm = (t3*t)*(t3*t) + dknorm; 00672 } 00673 } 00674 00675 // ------------------------------------------------------------------ 00676 // Monitor the norm of d_k, the update to x. 00677 // dknorm = norm( d_k ) 00678 // dnorm = norm( D_k ), where D_k = (d_1, d_2, ..., d_k ) 00679 // dxk = norm( phi_k d_k ), where new x = x_k + phi_k d_k. 00680 // ------------------------------------------------------------------ 00681 dknorm = sqrt( dknorm ); 00682 dnorm = d2norm( dnorm, dknorm ); 00683 dxk = fabs( phi * dknorm ); 00684 if (dxmax < dxk ) { 00685 dxmax = dxk; 00686 maxdx = itn; 00687 } 00688 00689 // ------------------------------------------------------------------ 00690 // Use a plane rotation on the right to eliminate the 00691 // super-diagonal element (theta) of the upper-bidiagonal matrix. 00692 // Then use the result to estimate norm(x). 00693 // ------------------------------------------------------------------ 00694 delta = sn2 * rho; 00695 gambar = - cs2 * rho; 00696 rhs = phi - delta * z; 00697 zbar = rhs / gambar; 00698 xnorm = d2norm( xnorm1, zbar ); 00699 gamma = d2norm( gambar, theta ); 00700 cs2 = gambar / gamma; 00701 sn2 = theta / gamma; 00702 z = rhs / gamma; 00703 xnorm1 = d2norm( xnorm1, z ); 00704 00705 // ------------------------------------------------------------------ 00706 // Test for convergence. 00707 // First, estimate the norm and condition of the matrix Abar, 00708 // and the norms of rbar and Abar(transpose)*rbar. 00709 // ------------------------------------------------------------------ 00710 acond = anorm * dnorm; 00711 res2 = d2norm( res2 , psi ); 00712 rnorm = d2norm( res2 , phibar ); 00713 arnorm = alpha * fabs( tau ); 00714 00715 // Now use these norms to estimate certain other quantities, 00716 // some of which will be small near a solution. 00717 00718 alfopt = sqrt( rnorm / (dnorm * xnorm) ); 00719 test1 = rnorm / bnorm; 00720 test2 = ZERO; 00721 // if (rnorm > ZERO) test2 = arnorm / (anorm * rnorm); 00722 if (arnorm0 > ZERO) test2 = arnorm / arnorm0; 00723 test3 = ONE / acond; 00724 t1 = test1 / (ONE + anorm * xnorm / bnorm); 00725 rtol = btol + atol * anorm * xnorm / bnorm; 00726 00727 // The following tests guard against extremely small values of 00728 // atol, btol or ctol. (The user may have set any or all of 00729 // the parameters atol, btol, conlim to zero.) 00730 // The effect is equivalent to the normal tests using 00731 // atol = relpr, btol = relpr, conlim = 1/relpr. 00732 00733 t3 = ONE + test3; 00734 t2 = ONE + test2; 00735 t1 = ONE + t1; 00736 if (itn >= itnlim) istop = 5; 00737 if (t3 <= ONE ) istop = 4; 00738 if (t2 <= ONE ) istop = 2; 00739 if (t1 <= ONE ) istop = 1; 00740 00741 // Allow for tolerances set by the user. 00742 00743 if (test3 <= ctol) istop = 4; 00744 if (test2 <= atol) istop = 2; 00745 // if (test1 <= rtol) istop = 1; 00746 00747 // ------------------------------------------------------------------ 00748 // See if it is time to print something. 00749 // ------------------------------------------------------------------ 00750 if (nout == NULL ) goto goto_600; 00751 if (n <= 40 ) goto goto_400; 00752 if (itn <= 10 ) goto goto_400; 00753 if (itn >= itnlim-10) goto goto_400; 00754 if (itn % 10 == 0 ) goto goto_400; 00755 if (test3 <= 2.0*ctol) goto goto_400; 00756 if (test2 <= 10.0*atol) goto goto_400; 00757 if (test1 <= 10.0*rtol) goto goto_400; 00758 if (istop != 0 ) goto goto_400; 00759 goto goto_600; 00760 00761 // Print a line for this iteration. 00762 // "extra" is for experimental purposes. 00763 00764 goto_400: 00765 if ( extra ) { 00766 fprintf(nout, fmt_1500_extra, 00767 itn, x[0], rnorm, test1, test2, anorm, 00768 acond, phi, dknorm, dxk, alfopt); 00769 } 00770 else { 00771 fprintf(nout, fmt_1500, 00772 itn, x[0], rnorm, test1, test2, anorm, acond); 00773 } 00774 if (itn % 10 == 0) fprintf(nout, fmt_1600); 00775 00776 // ------------------------------------------------------------------ 00777 // Stop if appropriate. 00778 // The convergence criteria are required to be met on nconv 00779 // consecutive iterations, where nconv is set below. 00780 // Suggested value: nconv = 1, 2 or 3. 00781 // ------------------------------------------------------------------ 00782 goto_600: 00783 if (istop == 0) { 00784 nstop = 0; 00785 } 00786 else { 00787 nconv = 1; 00788 nstop = nstop + 1; 00789 if (nstop < nconv && itn < itnlim) istop = 0; 00790 } 00791 00792 if (istop != 0) break; 00793 00794 } 00795 // ================================================================== 00796 // End of iteration loop. 00797 // ================================================================== 00798 00799 // Finish off the standard error estimates. 00800 00801 if ( wantse ) { 00802 t = ONE; 00803 if (m > n) t = m - n; 00804 if ( damped ) t = m; 00805 t = rnorm / sqrt( t ); 00806 00807 for (i = 0; i < n; i++) 00808 se[i] = t * sqrt( se[i] ); 00809 00810 } 00811 00812 // Decide if istop = 2 or 3. 00813 // Print the stopping condition. 00814 goto_800: 00815 if (damped && istop == 2) istop = 3; 00816 if (nout != NULL) { 00817 fprintf(nout, fmt_2000, 00818 exit, istop, itn, 00819 exit, anorm, acond, 00820 exit, bnorm, xnorm, 00821 exit, rnorm, arnorm); 00822 fprintf(nout, fmt_2100, 00823 exit, dxmax, maxdx, 00824 exit, dxmax/(xnorm + 1.0e-20)); 00825 fprintf(nout, fmt_3000, 00826 exit, msg[istop]); 00827 } 00828 00829 // Assign output variables from local copies. 00830 *istop_out = istop; 00831 *itn_out = itn; 00832 *anorm_out = anorm; 00833 *acond_out = acond; 00834 *rnorm_out = rnorm; 00835 *arnorm_out = test2; 00836 *xnorm_out = xnorm; 00837 00838 return; 00839 } 00840 00841
Generated on Sun Mar 4 22:50:03 2007 by Doxygen 1.5.1