Welcome to BCLS!

00001 /* lsqr.c
00002    $Revision: 273 $ $Date: 2006-09-04 15:59:04 -0700 (Mon, 04 Sep 2006) $
00003 */
00012 #include <stdlib.h>
00013 #include <string.h>
00014 #include <stdio.h>
00015 #include <stdbool.h>
00016 #include <math.h>
00017 
00018 #ifdef __APPLE__
00019   #include <vecLib/vecLib.h>
00020 #else
00021   #include <cblas.h>
00022 #endif
00023 
00024 #define ZERO   0.0
00025 #define ONE    1.0
00026 
00027 // ---------------------------------------------------------------------
00028 // d2norm  returns  sqrt( a**2 + b**2 )  with precautions
00029 // to avoid overflow.
00030 //
00031 // 21 Mar 1990: First version.
00032 // ---------------------------------------------------------------------
00033 static double
00034 d2norm( const double a, const double b )
00035 {
00036     double scale;
00037     const double zero = 0.0;
00038 
00039     scale  = fabs( a ) + fabs( b );
00040     if (scale == zero)
00041         return zero;
00042     else
00043         return scale * sqrt( (a/scale)*(a/scale) + (b/scale)*(b/scale) );
00044 }
00045 
00046 static void
00047 dload( const int n, const double alpha, double x[] )
00048 {    
00049     int i;
00050     for (i = 0; i < n; i++) x[i] = alpha;
00051     return;
00052 }
00053 
00054 // ---------------------------------------------------------------------
00055 // LSQR
00056 // ---------------------------------------------------------------------
00057 void lsqr( 
00058           int m,
00059           int n,
00060           void (*aprod)(int mode, int m, int n, double x[], double y[],
00061                         void *UsrWrk),
00062           double damp,
00063           void   *UsrWrk,
00064           double u[],     // len = m
00065           double v[],     // len = n
00066           double w[],     // len = n
00067           double x[],     // len = n
00068           double se[],    // len at least n.  May be NULL.
00069           double atol,
00070           double btol,
00071           double conlim,
00072           int    itnlim,
00073           FILE   *nout,
00074           // The remaining variables are output only.
00075           int    *istop_out,
00076           int    *itn_out,
00077           double *anorm_out,
00078           double *acond_out,
00079           double *rnorm_out,
00080           double *arnorm_out,
00081           double *xnorm_out
00082          )
00083 {
00084 //     ------------------------------------------------------------------
00085 //
00086 //     LSQR  finds a solution x to the following problems:
00087 //
00088 //     1. Unsymmetric equations --    solve  A*x = b
00089 //
00090 //     2. Linear least squares  --    solve  A*x = b
00091 //                                    in the least-squares sense
00092 //
00093 //     3. Damped least squares  --    solve  (   A    )*x = ( b )
00094 //                                           ( damp*I )     ( 0 )
00095 //                                    in the least-squares sense
00096 //
00097 //     where A is a matrix with m rows and n columns, b is an
00098 //     m-vector, and damp is a scalar.  (All quantities are real.)
00099 //     The matrix A is intended to be large and sparse.  It is accessed
00100 //     by means of subroutine calls of the form
00101 //
00102 //                aprod ( mode, m, n, x, y, UsrWrk )
00103 //
00104 //     which must perform the following functions:
00105 //
00106 //                If mode = 1, compute  y = y + A*x.
00107 //                If mode = 2, compute  x = x + A(transpose)*y.
00108 //
00109 //     The vectors x and y are input parameters in both cases.
00110 //     If  mode = 1,  y should be altered without changing x.
00111 //     If  mode = 2,  x should be altered without changing y.
00112 //     The parameter UsrWrk may be used for workspace as described
00113 //     below.
00114 //
00115 //     The rhs vector b is input via u, and subsequently overwritten.
00116 //
00117 //
00118 //     Note:  LSQR uses an iterative method to approximate the solution.
00119 //     The number of iterations required to reach a certain accuracy
00120 //     depends strongly on the scaling of the problem.  Poor scaling of
00121 //     the rows or columns of A should therefore be avoided where
00122 //     possible.
00123 //
00124 //     For example, in problem 1 the solution is unaltered by
00125 //     row-scaling.  If a row of A is very small or large compared to
00126 //     the other rows of A, the corresponding row of ( A  b ) should be
00127 //     scaled up or down.
00128 //
00129 //     In problems 1 and 2, the solution x is easily recovered
00130 //     following column-scaling.  Unless better information is known,
00131 //     the nonzero columns of A should be scaled so that they all have
00132 //     the same Euclidean norm (e.g., 1.0).
00133 //
00134 //     In problem 3, there is no freedom to re-scale if damp is
00135 //     nonzero.  However, the value of damp should be assigned only
00136 //     after attention has been paid to the scaling of A.
00137 //
00138 //     The parameter damp is intended to help regularize
00139 //     ill-conditioned systems, by preventing the true solution from
00140 //     being very large.  Another aid to regularization is provided by
00141 //     the parameter acond, which may be used to terminate iterations
00142 //     before the computed solution becomes very large.
00143 //
00144 //     Note that x is not an input parameter.
00145 //     If some initial estimate x0 is known and if damp = 0,
00146 //     one could proceed as follows:
00147 //
00148 //       1. Compute a residual vector     r0 = b - A*x0.
00149 //       2. Use LSQR to solve the system  A*dx = r0.
00150 //       3. Add the correction dx to obtain a final solution x = x0 + dx.
00151 //
00152 //     This requires that x0 be available before and after the call
00153 //     to LSQR.  To judge the benefits, suppose LSQR takes k1 iterations
00154 //     to solve A*x = b and k2 iterations to solve A*dx = r0.
00155 //     If x0 is "good", norm(r0) will be smaller than norm(b).
00156 //     If the same stopping tolerances atol and btol are used for each
00157 //     system, k1 and k2 will be similar, but the final solution x0 + dx
00158 //     should be more accurate.  The only way to reduce the total work
00159 //     is to use a larger stopping tolerance for the second system.
00160 //     If some value btol is suitable for A*x = b, the larger value
00161 //     btol*norm(b)/norm(r0)  should be suitable for A*dx = r0.
00162 //
00163 //     Preconditioning is another way to reduce the number of iterations.
00164 //     If it is possible to solve a related system M*x = b efficiently,
00165 //     where M approximates A in some helpful way
00166 //     (e.g. M - A has low rank or its elements are small relative to
00167 //     those of A), LSQR may converge more rapidly on the system
00168 //           A*M(inverse)*z = b,
00169 //     after which x can be recovered by solving M*x = z.
00170 //
00171 //     NOTE: If A is symmetric, LSQR should not be used!
00172 //     Alternatives are the symmetric conjugate-gradient method (cg)
00173 //     and/or SYMMLQ.
00174 //     SYMMLQ is an implementation of symmetric cg that applies to
00175 //     any symmetric A and will converge more rapidly than LSQR.
00176 //     If A is positive definite, there are other implementations of
00177 //     symmetric cg that require slightly less work per iteration
00178 //     than SYMMLQ (but will take the same number of iterations).
00179 //
00180 //
00181 //     Notation
00182 //     --------
00183 //
00184 //     The following quantities are used in discussing the subroutine
00185 //     parameters:
00186 //
00187 //     Abar   =  (   A    ),          bbar  =  ( b )
00188 //               ( damp*I )                    ( 0 )
00189 //
00190 //     r      =  b  -  A*x,           rbar  =  bbar  -  Abar*x
00191 //
00192 //     rnorm  =  sqrt( norm(r)**2  +  damp**2 * norm(x)**2 )
00193 //            =  norm( rbar )
00194 //
00195 //     relpr  =  the relative precision of floating-point arithmetic
00196 //               on the machine being used.  On most machines,
00197 //               relpr is about 1.0e-7 and 1.0d-16 in single and double
00198 //               precision respectively.
00199 //
00200 //     LSQR  minimizes the function rnorm with respect to x.
00201 //
00202 //
00203 //     Parameters
00204 //     ----------
00205 //
00206 //     m       input      m, the number of rows in A.
00207 //
00208 //     n       input      n, the number of columns in A.
00209 //
00210 //     aprod   external   See above.
00211 //
00212 //     damp    input      The damping parameter for problem 3 above.
00213 //                        (damp should be 0.0 for problems 1 and 2.)
00214 //                        If the system A*x = b is incompatible, values
00215 //                        of damp in the range 0 to sqrt(relpr)*norm(A)
00216 //                        will probably have a negligible effect.
00217 //                        Larger values of damp will tend to decrease
00218 //                        the norm of x and reduce the number of 
00219 //                        iterations required by LSQR.
00220 //
00221 //                        The work per iteration and the storage needed
00222 //                        by LSQR are the same for all values of damp.
00223 //
00224 //     rw      workspace  Transit pointer to user's workspace.
00225 //                        Note:  LSQR  does not explicitly use this
00226 //                        parameter, but passes it to subroutine aprod for
00227 //                        possible use as workspace.
00228 //
00229 //     u(m)    input      The rhs vector b.  Beware that u is
00230 //                        over-written by LSQR.
00231 //
00232 //     v(n)    workspace
00233 //
00234 //     w(n)    workspace
00235 //
00236 //     x(n)    output     Returns the computed solution x.
00237 //
00238 //     se(*)   output     If m .gt. n  or  damp .gt. 0,  the system is
00239 //             (maybe)    overdetermined and the standard errors may be
00240 //                        useful.  (See the first LSQR reference.)
00241 //                        Otherwise (m .le. n  and  damp = 0) they do not
00242 //                        mean much.  Some time and storage can be saved
00243 //                        by setting  se = NULL.  In that case, se will
00244 //                        not be touched.
00245 //
00246 //                        If se is not NULL, then the dimension of se must
00247 //                        be n or more.  se(1:n) then returns standard error
00248 //                        estimates for the components of x.
00249 //                        For each i, se(i) is set to the value
00250 //                           rnorm * sqrt( sigma(i,i) / t ),
00251 //                        where sigma(i,i) is an estimate of the i-th
00252 //                        diagonal of the inverse of Abar(transpose)*Abar
00253 //                        and  t = 1      if  m .le. n,
00254 //                             t = m - n  if  m .gt. n  and  damp = 0,
00255 //                             t = m      if  damp .ne. 0.
00256 //
00257 //     atol    input      An estimate of the relative error in the data
00258 //                        defining the matrix A.  For example,
00259 //                        if A is accurate to about 6 digits, set
00260 //                        atol = 1.0e-6 .
00261 //
00262 //     btol    input      An estimate of the relative error in the data
00263 //                        defining the rhs vector b.  For example,
00264 //                        if b is accurate to about 6 digits, set
00265 //                        btol = 1.0e-6 .
00266 //
00267 //     conlim  input      An upper limit on cond(Abar), the apparent
00268 //                        condition number of the matrix Abar.
00269 //                        Iterations will be terminated if a computed
00270 //                        estimate of cond(Abar) exceeds conlim.
00271 //                        This is intended to prevent certain small or
00272 //                        zero singular values of A or Abar from
00273 //                        coming into effect and causing unwanted growth
00274 //                        in the computed solution.
00275 //
00276 //                        conlim and damp may be used separately or
00277 //                        together to regularize ill-conditioned systems.
00278 //
00279 //                        Normally, conlim should be in the range
00280 //                        1000 to 1/relpr.
00281 //                        Suggested value:
00282 //                        conlim = 1/(100*relpr)  for compatible systems,
00283 //                        conlim = 1/(10*sqrt(relpr)) for least squares.
00284 //
00285 //             Note:  If the user is not concerned about the parameters
00286 //             atol, btol and conlim, any or all of them may be set
00287 //             to zero.  The effect will be the same as the values
00288 //             relpr, relpr and 1/relpr respectively.
00289 //
00290 //     itnlim  input      An upper limit on the number of iterations.
00291 //                        Suggested value:
00292 //                        itnlim = n/2   for well-conditioned systems
00293 //                                       with clustered singular values,
00294 //                        itnlim = 4*n   otherwise.
00295 //
00296 //     nout    input      File number for printed output.  If positive,
00297 //                        a summary will be printed on file nout.
00298 //
00299 //     istop   output     An integer giving the reason for termination:
00300 //
00301 //                0       x = 0  is the exact solution.
00302 //                        No iterations were performed.
00303 //
00304 //                1       The equations A*x = b are probably
00305 //                        compatible.  Norm(A*x - b) is sufficiently
00306 //                        small, given the values of atol and btol.
00307 //
00308 //                2       damp is zero.  The system A*x = b is probably
00309 //                        not compatible.  A least-squares solution has
00310 //                        been obtained that is sufficiently accurate,
00311 //                        given the value of atol.
00312 //
00313 //                3       damp is nonzero.  A damped least-squares
00314 //                        solution has been obtained that is sufficiently
00315 //                        accurate, given the value of atol.
00316 //
00317 //                4       An estimate of cond(Abar) has exceeded
00318 //                        conlim.  The system A*x = b appears to be
00319 //                        ill-conditioned.  Otherwise, there could be an
00320 //                        error in subroutine aprod.
00321 //
00322 //                5       The iteration limit itnlim was reached.
00323 //
00324 //     itn     output     The number of iterations performed.
00325 //
00326 //     anorm   output     An estimate of the Frobenius norm of  Abar.
00327 //                        This is the square-root of the sum of squares
00328 //                        of the elements of Abar.
00329 //                        If damp is small and if the columns of A
00330 //                        have all been scaled to have length 1.0,
00331 //                        anorm should increase to roughly sqrt(n).
00332 //                        A radically different value for anorm may
00333 //                        indicate an error in subroutine aprod (there
00334 //                        may be an inconsistency between modes 1 and 2).
00335 //
00336 //     acond   output     An estimate of cond(Abar), the condition
00337 //                        number of Abar.  A very high value of acond
00338 //                        may again indicate an error in aprod.
00339 //
00340 //     rnorm   output     An estimate of the final value of norm(rbar),
00341 //                        the function being minimized (see notation
00342 //                        above).  This will be small if A*x = b has
00343 //                        a solution.
00344 //
00345 //     arnorm  output     An estimate of the final value of
00346 //                        norm( Abar(transpose)*rbar ), the norm of
00347 //                        the residual for the usual normal equations.
00348 //                        This should be small in all cases.  (arnorm
00349 //                        will often be smaller than the true value
00350 //                        computed from the output vector x.)
00351 //
00352 //     xnorm   output     An estimate of the norm of the final
00353 //                        solution vector x.
00354 //
00355 //
00356 //     Subroutines and functions used              
00357 //     ------------------------------
00358 //
00359 //     USER               aprod
00360 //     CBLAS              dcopy, dnrm2, dscal (see Lawson et al. below)
00361 //
00362 //
00363 //     References
00364 //     ----------
00365 //
00366 //     C.C. Paige and M.A. Saunders,  LSQR: An algorithm for sparse
00367 //          linear equations and sparse least squares,
00368 //          ACM Transactions on Mathematical Software 8, 1 (March 1982),
00369 //          pp. 43-71.
00370 //
00371 //     C.C. Paige and M.A. Saunders,  Algorithm 583, LSQR: Sparse
00372 //          linear equations and least-squares problems,
00373 //          ACM Transactions on Mathematical Software 8, 2 (June 1982),
00374 //          pp. 195-209.
00375 //
00376 //     C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh,
00377 //          Basic linear algebra subprograms for Fortran usage,
00378 //          ACM Transactions on Mathematical Software 5, 3 (Sept 1979),
00379 //          pp. 308-323 and 324-325.
00380 //     ------------------------------------------------------------------
00381 //
00382 //
00383 //     LSQR development:
00384 //     22 Feb 1982: LSQR sent to ACM TOMS to become Algorithm 583.
00385 //     15 Sep 1985: Final F66 version.  LSQR sent to "misc" in netlib.
00386 //     13 Oct 1987: Bug (Robert Davies, DSIR).  Have to delete
00387 //                     if ( (one + dabs(t)) .le. one ) GO TO 200
00388 //                  from loop 200.  The test was an attempt to reduce
00389 //                  underflows, but caused w(i) not to be updated.
00390 //     17 Mar 1989: First F77 version.
00391 //     04 May 1989: Bug (David Gay, AT&T).  When the second beta is zero,
00392 //                  rnorm = 0 and
00393 //                  test2 = arnorm / (anorm * rnorm) overflows.
00394 //                  Fixed by testing for rnorm = 0.
00395 //     05 May 1989: Sent to "misc" in netlib.
00396 //     14 Mar 1990: Bug (John Tomlin via IBM OSL testing).
00397 //                  Setting rhbar2 = rhobar**2 + dampsq can give zero
00398 //                  if rhobar underflows and damp = 0.
00399 //                  Fixed by testing for damp = 0 specially.
00400 //     15 Mar 1990: Converted to lower case.
00401 //     21 Mar 1990: d2norm introduced to avoid overflow in numerous
00402 //                  items like  c = sqrt( a**2 + b**2 ).
00403 //     04 Sep 1991: wantse added as an argument to LSQR, to make
00404 //                  standard errors optional.  This saves storage and
00405 //                  time when se(*) is not wanted.
00406 //     13 Feb 1992: istop now returns a value in [1,5], not [1,7].
00407 //                  1, 2 or 3 means that x solves one of the problems
00408 //                  Ax = b,  min norm(Ax - b)  or  damped least squares.
00409 //                  4 means the limit on cond(A) was reached.
00410 //                  5 means the limit on iterations was reached.
00411 //     07 Dec 1994: Keep track of dxmax = max_k norm( phi_k * d_k ).
00412 //                  So far, this is just printed at the end.
00413 //                  A large value (relative to norm(x)) indicates
00414 //                  significant cancellation in forming
00415 //                  x  =  D*f  =  sum( phi_k * d_k ).
00416 //                  A large column of D need NOT be serious if the
00417 //                  corresponding phi_k is small.
00418 //     27 Dec 1994: Include estimate of alfa_opt in iteration log.
00419 //                  alfa_opt is the optimal scale factor for the
00420 //                  residual in the "augmented system", as described by
00421 //                  A. Bjorck (1992),
00422 //                  Pivoting and stability in the augmented system method,
00423 //                  in D. F. Griffiths and G. A. Watson (eds.),
00424 //                  "Numerical Analysis 1991",
00425 //                  Proceedings of the 14th Dundee Conference,
00426 //                  Pitman Research Notes in Mathematics 260,
00427 //                  Longman Scientific and Technical, Harlow, Essex, 1992.
00428 //     14 Apr 2006: "Line-by-line" conversion to ISO C by
00429 //                  Michael P. Friedlander.
00430 //
00431 //
00432 //     Michael A. Saunders                  mike@sol-michael.stanford.edu
00433 //     Dept of Operations Research          na.Msaunders@na-net.ornl.gov
00434 //     Stanford University
00435 //     Stanford, CA 94305-4022              (415) 723-1875
00436 //-----------------------------------------------------------------------
00437 
00438 //  Local copies of output variables.  Output vars are assigned at exit.
00439     int
00440         istop  = 0,
00441         itn    = 0;
00442     double
00443         anorm  = ZERO,
00444         acond  = ZERO,
00445         rnorm  = ZERO,
00446         arnorm = ZERO,
00447         xnorm  = ZERO;
00448 
00449 //  Local variables
00450 
00451     const bool
00452         extra  = false,       // true for extra printing below.
00453         damped = damp > ZERO,
00454         wantse = se != NULL;
00455     int
00456         i, maxdx, nconv, nstop;
00457     double
00458         alfopt, alpha, arnorm0, beta, bnorm,
00459         cs, cs1, cs2, ctol,
00460         delta, dknorm, dnorm, dxk, dxmax,
00461         gamma, gambar, phi, phibar, psi,
00462         res2, rho, rhobar, rhbar1,
00463         rhs, rtol, sn, sn1, sn2,
00464         t, tau, temp, test1, test2, test3,
00465         theta, t1, t2, t3, xnorm1, z, zbar;
00466     char
00467         enter[] = "Enter LSQR.  ",
00468         exit[]  = "Exit  LSQR.  ",
00469         msg[6][100] =
00470         {
00471             {"The exact solution is  x = 0"},
00472             {"A solution to Ax = b was found, given atol, btol"},
00473             {"A least-squares solution was found, given atol"},
00474             {"A damped least-squares solution was found, given atol"},
00475             {"Cond(Abar) seems to be too large, given conlim"},
00476             {"The iteration limit was reached"}
00477         };
00478 //-----------------------------------------------------------------------
00479 
00480 //  Format strings.
00481     char fmt_1000[] = 
00482         " %s        Least-squares solution of  Ax = b\n"
00483         " The matrix  A  has %7d rows  and %7d columns\n"
00484         " damp   = %-22.2e    wantse = %10i\n"
00485         " atol   = %-22.2e    conlim = %10.2e\n"
00486         " btol   = %-22.2e    itnlim = %10d\n\n";
00487     char fmt_1200[] =
00488         "    Itn       x(1)           Function"
00489         "     Compatible    LS      Norm A   Cond A\n";
00490     char fmt_1300[] =
00491         "    Itn       x(1)           Function"
00492         "     Compatible    LS      Norm Abar   Cond Abar\n";
00493     char fmt_1400[] =
00494         "     phi    dknorm  dxk  alfa_opt\n";
00495     char fmt_1500_extra[] =
00496         " %6d %16.9e %16.9e %9.2e %9.2e %8.1e %8.1e %8.1e %7.1e %7.1e %7.1e\n";
00497     char fmt_1500[] =
00498         " %6d %16.9e %16.9e %9.2e %9.2e %8.1e %8.1e\n";
00499     char fmt_1550[] =
00500         " %6d %16.9e %16.9e %9.2e %9.2e\n";
00501     char fmt_1600[] = 
00502         "\n";
00503     char fmt_2000[] =
00504         "\n"
00505         " %s       istop  = %-10d      itn    = %-10d\n"
00506         " %s       anorm  = %11.5e     acond  = %11.5e\n"
00507         " %s       vnorm  = %11.5e     xnorm  = %11.5e\n"
00508         " %s       rnorm  = %11.5e     arnorm = %11.5e\n";
00509     char fmt_2100[] =
00510         " %s       max dx = %7.1e occured at itn %-9d\n"
00511         " %s              = %7.1e*xnorm\n";
00512     char fmt_3000[] =
00513         " %s       %s\n";
00514 
00515 //  Initialize.
00516 
00517     if (nout != NULL)
00518         fprintf(nout, fmt_1000,
00519                 enter, m, n, damp, wantse,
00520                 atol, conlim, btol, itnlim);
00521 
00522     itn    =   0;
00523     istop  =   0;
00524     nstop  =   0;
00525     maxdx  =   0;
00526     ctol   =   ZERO;
00527     if (conlim > ZERO) ctol = ONE / conlim;
00528     anorm  =   ZERO;
00529     acond  =   ZERO;
00530     dnorm  =   ZERO;
00531     dxmax  =   ZERO;
00532     res2   =   ZERO;
00533     psi    =   ZERO;
00534     xnorm  =   ZERO;
00535     xnorm1 =   ZERO;
00536     cs2    = - ONE;
00537     sn2    =   ZERO;
00538     z      =   ZERO;
00539 
00540 //  ------------------------------------------------------------------
00541 //  Set up the first vectors u and v for the bidiagonalization.
00542 //  These satisfy  beta*u = b,  alpha*v = A(transpose)*u.
00543 //  ------------------------------------------------------------------
00544     dload( n, 0.0, v );
00545     dload( n, 0.0, x );
00546 
00547     if ( wantse )
00548         dload( n, 0.0, se );
00549     
00550     alpha  =   ZERO;
00551     beta   =   cblas_dnrm2 ( m, u, 1 );
00552 
00553     if (beta > ZERO) {
00554         cblas_dscal ( m, (ONE / beta), u, 1 );
00555         aprod ( 2, m, n, v, u, UsrWrk );
00556         alpha  =   cblas_dnrm2 ( n, v, 1 );
00557     }
00558 
00559     if (alpha > ZERO) {
00560         cblas_dscal ( n, (ONE / alpha), v, 1 );
00561         cblas_dcopy ( n, v, 1, w, 1 );
00562     }
00563 
00564     arnorm = arnorm0 = alpha * beta;
00565     if (arnorm == ZERO) goto goto_800;
00566     
00567     rhobar =   alpha;
00568     phibar =   beta;
00569     bnorm  =   beta;
00570     rnorm  =   beta;
00571 
00572     if (nout != NULL) {
00573         if ( damped ) 
00574             fprintf(nout, fmt_1300);
00575         else
00576             fprintf(nout, fmt_1200);
00577 
00578         test1  = ONE;
00579         test2  = alpha / beta;
00580         
00581         if ( extra ) 
00582             fprintf(nout, fmt_1400);
00583 
00584         fprintf(nout, fmt_1550, itn, x[0], rnorm, test1, test2);
00585         fprintf(nout, fmt_1600);
00586     }
00587 
00588 
00589 //  ==================================================================
00590 //  Main iteration loop.
00591 //  ==================================================================
00592     while (1) {
00593         itn    = itn + 1;
00594         
00595 //      ------------------------------------------------------------------
00596 //      Perform the next step of the bidiagonalization to obtain the
00597 //      next  beta, u, alpha, v.  These satisfy the relations
00598 //                 beta*u  =  A*v  -  alpha*u,
00599 //                alpha*v  =  A(transpose)*u  -  beta*v.
00600 //      ------------------------------------------------------------------
00601         cblas_dscal ( m, (- alpha), u, 1 );
00602         aprod ( 1, m, n, v, u, UsrWrk );
00603         beta   =   cblas_dnrm2 ( m, u, 1 );
00604 
00605 //      Accumulate  anorm = || Bk ||
00606 //                        =  sqrt( sum of  alpha**2 + beta**2 + damp**2 ).
00607 
00608         temp   =   d2norm( alpha, beta );
00609         temp   =   d2norm( temp , damp );
00610         anorm  =   d2norm( anorm, temp );
00611 
00612         if (beta > ZERO) {
00613             cblas_dscal ( m, (ONE / beta), u, 1 );
00614             cblas_dscal ( n, (- beta), v, 1 );
00615             aprod ( 2, m, n, v, u, UsrWrk );
00616             alpha  =   cblas_dnrm2 ( n, v, 1 );
00617             if (alpha > ZERO) {
00618                 cblas_dscal ( n, (ONE / alpha), v, 1 );
00619             }
00620         }
00621 
00622 //      ------------------------------------------------------------------
00623 //      Use a plane rotation to eliminate the damping parameter.
00624 //      This alters the diagonal (rhobar) of the lower-bidiagonal matrix.
00625 //      ------------------------------------------------------------------
00626         rhbar1 = rhobar;
00627         if ( damped ) {
00628             rhbar1 = d2norm( rhobar, damp );
00629             cs1    = rhobar / rhbar1;
00630             sn1    = damp   / rhbar1;
00631             psi    = sn1 * phibar;
00632             phibar = cs1 * phibar;
00633         }
00634 
00635 //      ------------------------------------------------------------------
00636 //      Use a plane rotation to eliminate the subdiagonal element (beta)
00637 //      of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
00638 //      ------------------------------------------------------------------
00639         rho    =   d2norm( rhbar1, beta );
00640         cs     =   rhbar1 / rho;
00641         sn     =   beta   / rho;
00642         theta  =   sn * alpha;
00643         rhobar = - cs * alpha;
00644         phi    =   cs * phibar;
00645         phibar =   sn * phibar;
00646         tau    =   sn * phi;
00647 
00648 //      ------------------------------------------------------------------
00649 //      Update  x, w  and (perhaps) the standard error estimates.
00650 //      ------------------------------------------------------------------
00651         t1     =   phi   / rho;
00652         t2     = - theta / rho;
00653         t3     =   ONE   / rho;
00654         dknorm =   ZERO;
00655 
00656         if ( wantse ) {
00657             for (i = 0; i < n; i++) {
00658                 t      =  w[i];
00659                 x[i]   =  t1*t  +  x[i];
00660                 w[i]   =  t2*t  +  v[i];
00661                 t      = (t3*t)*(t3*t);
00662                 se[i]  =  t     +  se[i];
00663                 dknorm =  t     +  dknorm;
00664             }
00665         }
00666         else {
00667             for (i = 0; i < n; i++) {
00668                 t      =  w[i];
00669                 x[i]   =  t1*t  +  x[i];
00670                 w[i]   =  t2*t  +  v[i];
00671                 dknorm = (t3*t)*(t3*t)  +  dknorm;
00672             }
00673         }
00674 
00675 //      ------------------------------------------------------------------
00676 //      Monitor the norm of d_k, the update to x.
00677 //      dknorm = norm( d_k )
00678 //      dnorm  = norm( D_k ),        where   D_k = (d_1, d_2, ..., d_k )
00679 //      dxk    = norm( phi_k d_k ),  where new x = x_k + phi_k d_k.
00680 //      ------------------------------------------------------------------
00681         dknorm = sqrt( dknorm );
00682         dnorm  = d2norm( dnorm, dknorm );
00683         dxk    = fabs( phi * dknorm );
00684         if (dxmax < dxk ) {
00685             dxmax   =  dxk;
00686             maxdx   =  itn;
00687         }
00688 
00689 //      ------------------------------------------------------------------
00690 //      Use a plane rotation on the right to eliminate the
00691 //      super-diagonal element (theta) of the upper-bidiagonal matrix.
00692 //      Then use the result to estimate  norm(x).
00693 //      ------------------------------------------------------------------
00694         delta  =   sn2 * rho;
00695         gambar = - cs2 * rho;
00696         rhs    =   phi    - delta * z;
00697         zbar   =   rhs    / gambar;
00698         xnorm  =   d2norm( xnorm1, zbar  );
00699         gamma  =   d2norm( gambar, theta );
00700         cs2    =   gambar / gamma;
00701         sn2    =   theta  / gamma;
00702         z      =   rhs    / gamma;
00703         xnorm1 =   d2norm( xnorm1, z     );
00704 
00705 //      ------------------------------------------------------------------
00706 //      Test for convergence.
00707 //      First, estimate the norm and condition of the matrix  Abar,
00708 //      and the norms of  rbar  and  Abar(transpose)*rbar.
00709 //      ------------------------------------------------------------------
00710         acond  =   anorm * dnorm;
00711         res2   =   d2norm( res2 , psi    );
00712         rnorm  =   d2norm( res2 , phibar );
00713         arnorm =   alpha * fabs( tau );
00714 
00715 //      Now use these norms to estimate certain other quantities,
00716 //      some of which will be small near a solution.
00717 
00718         alfopt =   sqrt( rnorm / (dnorm * xnorm) );
00719         test1  =   rnorm /  bnorm;
00720         test2  =   ZERO;
00721 //      if (rnorm   > ZERO) test2 = arnorm / (anorm * rnorm);
00722         if (arnorm0 > ZERO) test2 = arnorm / arnorm0;
00723         test3  =   ONE   /  acond;
00724         t1     =   test1 / (ONE  +  anorm * xnorm / bnorm);
00725         rtol   =   btol  +  atol *  anorm * xnorm / bnorm;
00726 
00727 //      The following tests guard against extremely small values of
00728 //      atol, btol  or  ctol.  (The user may have set any or all of
00729 //      the parameters  atol, btol, conlim  to zero.)
00730 //      The effect is equivalent to the normal tests using
00731 //      atol = relpr,  btol = relpr,  conlim = 1/relpr.
00732 
00733         t3     =   ONE + test3;
00734         t2     =   ONE + test2;
00735         t1     =   ONE + t1;
00736         if (itn >= itnlim) istop = 5;
00737         if (t3  <= ONE   ) istop = 4;
00738         if (t2  <= ONE   ) istop = 2;
00739         if (t1  <= ONE   ) istop = 1;
00740 
00741 //      Allow for tolerances set by the user.
00742 
00743         if (test3 <= ctol) istop = 4;
00744         if (test2 <= atol) istop = 2;
00745         //        if (test1 <= rtol) istop = 1;
00746 
00747 //      ------------------------------------------------------------------
00748 //      See if it is time to print something.
00749 //      ------------------------------------------------------------------
00750         if (nout  == NULL     ) goto goto_600;
00751         if (n     <= 40       ) goto goto_400;
00752         if (itn   <= 10       ) goto goto_400;
00753         if (itn   >= itnlim-10) goto goto_400;
00754         if (itn % 10 == 0     ) goto goto_400;
00755         if (test3 <=  2.0*ctol) goto goto_400;
00756         if (test2 <= 10.0*atol) goto goto_400;
00757         if (test1 <= 10.0*rtol) goto goto_400;
00758         if (istop != 0        ) goto goto_400;
00759         goto goto_600;
00760 
00761 //      Print a line for this iteration.
00762 //      "extra" is for experimental purposes.
00763 
00764     goto_400:
00765         if ( extra ) {
00766             fprintf(nout, fmt_1500_extra,
00767                     itn, x[0], rnorm, test1, test2, anorm,
00768                     acond, phi, dknorm, dxk, alfopt);
00769         }
00770         else {
00771             fprintf(nout, fmt_1500,
00772                     itn, x[0], rnorm, test1, test2, anorm, acond);
00773         }
00774         if (itn % 10 == 0) fprintf(nout, fmt_1600);
00775 
00776 //      ------------------------------------------------------------------
00777 //      Stop if appropriate.
00778 //      The convergence criteria are required to be met on  nconv
00779 //      consecutive iterations, where  nconv  is set below.
00780 //      Suggested value:  nconv = 1, 2  or  3.
00781 //      ------------------------------------------------------------------
00782     goto_600:
00783         if (istop == 0) {
00784             nstop  = 0;
00785         }
00786         else {
00787             nconv  = 1;
00788             nstop  = nstop + 1;
00789             if (nstop < nconv  &&  itn < itnlim) istop = 0;
00790         }
00791 
00792         if (istop != 0) break;
00793         
00794     }
00795 //  ==================================================================
00796 //  End of iteration loop.
00797 //  ==================================================================
00798 
00799 //  Finish off the standard error estimates.
00800 
00801     if ( wantse ) {
00802         t    =   ONE;
00803         if (m > n)     t = m - n;
00804         if ( damped )  t = m;
00805         t    =   rnorm / sqrt( t );
00806       
00807         for (i = 0; i < n; i++)
00808             se[i]  = t * sqrt( se[i] );
00809         
00810     }
00811 
00812 //  Decide if istop = 2 or 3.
00813 //  Print the stopping condition.
00814  goto_800:
00815     if (damped  &&  istop == 2) istop = 3;
00816     if (nout != NULL) {
00817         fprintf(nout, fmt_2000,
00818                 exit, istop, itn,
00819                 exit, anorm, acond,
00820                 exit, bnorm, xnorm,
00821                 exit, rnorm, arnorm);
00822         fprintf(nout, fmt_2100,
00823                 exit, dxmax, maxdx,
00824                 exit, dxmax/(xnorm + 1.0e-20));
00825         fprintf(nout, fmt_3000,
00826                 exit, msg[istop]);
00827     }
00828 
00829 //  Assign output variables from local copies.
00830     *istop_out  = istop;
00831     *itn_out    = itn;
00832     *anorm_out  = anorm;
00833     *acond_out  = acond;
00834     *rnorm_out  = rnorm;
00835     *arnorm_out = test2;
00836     *xnorm_out  = xnorm;
00837 
00838     return;
00839 }
00840 
00841