655 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
			
		
		
	
	
			655 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			C++
		
	
	
| // =============================================================================
 | |
| // === spqr_front ==============================================================
 | |
| // =============================================================================
 | |
| 
 | |
| /* Given an m-by-n frontal matrix, use Householder reflections to reduce it
 | |
|    to upper trapezoidal form.  Columns 0:npiv-1 are checked against tol.
 | |
| 
 | |
|         0    # x x x x x x
 | |
|         1    # x x x x x x
 | |
|         2    # x x x x x x
 | |
|         3    # x x x x x x
 | |
|         4    - # x x x x x   <- Stair [0] = 4
 | |
|         5      # x x x x x
 | |
|         6      # x x x x x
 | |
|         7      # x x x x x
 | |
|         8      - # x x x x   <- Stair [1] = 8
 | |
|         9        # x x x x
 | |
|        10        # x x x x
 | |
|        11        # x x x x
 | |
|        12        - # x x x   <- Stair [2] = 12
 | |
|        13          # x x x
 | |
|        14          - # x x   <- Stair [3] = 14
 | |
|        15            - # x   <- Stair [4] = 15
 | |
|        16              - #   <- Stair [5] = 16
 | |
|                          -   <- Stair [6] = 17
 | |
| 
 | |
|     Suppose npiv = 3, and no columns fall below tol:
 | |
| 
 | |
|         0    R r r r r r r
 | |
|         1    h R r r r r r
 | |
|         2    h h R r r r r
 | |
|         3    h h h C c c c
 | |
|         4    - h h h C c c   <- Stair [0] = 4
 | |
|         5      h h h h C c
 | |
|         6      h h h h h C
 | |
|         7      h h h h h h
 | |
|         8      - h h h h h   <- Stair [1] = 8
 | |
|         9        h h h h h
 | |
|        10        h h h h h
 | |
|        11        h h h h h
 | |
|        12        - h h h h   <- Stair [2] = 12
 | |
|        13          h h h h
 | |
|        14          - h h h   <- Stair [3] = 14
 | |
|        15            - h h   <- Stair [4] = 15
 | |
|        16              - h   <- Stair [5] = 16
 | |
|                          -   <- Stair [6] = 17
 | |
| 
 | |
|     where r is an entry in the R block, c is an entry in the C (contribution)
 | |
|     block, and h is an entry in the H block (Householder vectors).  Diagonal
 | |
|     entries are capitalized.
 | |
| 
 | |
|     Suppose the 2nd column has a norm <= tol; the result is a "squeezed" R:
 | |
| 
 | |
|         0    R r r r r r r   <- Stair [1] = 0 to denote a dead pivot column
 | |
|         1    h - R r r r r
 | |
|         2    h - h C c c c
 | |
|         3    h - h h C c c
 | |
|         4    - - h h h C c   <- Stair [0] = 4
 | |
|         5      - h h h h C
 | |
|         6      - h h h h h
 | |
|         7      - h h h h h
 | |
|         8      - h h h h h
 | |
|         9        h h h h h
 | |
|        10        h h h h h
 | |
|        11        h h h h h
 | |
|        12        - h h h h   <- Stair [2] = 12
 | |
|        13          h h h h
 | |
|        14          - h h h   <- Stair [3] = 14
 | |
|        15            - h h   <- Stair [4] = 15
 | |
|        16              - h   <- Stair [5] = 16
 | |
|                          -   <- Stair [6] = 17
 | |
| 
 | |
|     where "diagonal" entries are capitalized.  The 2nd H vector is missing
 | |
|     (it is H2 = I, identity).  The 2nd column of R is not deleted.  The
 | |
|     contribution block is always triangular, but the first npiv columns of
 | |
|     the R can become "staggered".  Columns npiv to n-1 in the R block are
 | |
|     always the same length.
 | |
| 
 | |
|     If columns are found "dead", the staircase may be updated.  Stair[k] is
 | |
|     set to zero if k is dead.  Also, Stair[k] is increased, if necessary, to
 | |
|     ensure that R and C reside within the staircase.  For example:
 | |
| 
 | |
|         0 0 0
 | |
|         0 0 x
 | |
| 
 | |
|     with npiv = 2 has a Stair = [ 0 1 2 ] on output, to reflect the C block:
 | |
| 
 | |
|         - C c
 | |
|         - - C
 | |
| 
 | |
|     A tol of zero means that any nonzero norm (however small) is accepted;
 | |
|     only exact zero columns are flagged as dead.  A negative tol means that
 | |
|     the norms are ignored; a column is never flagged dead.  The default tol
 | |
|     is set elsewhere as 20 * (m+1) * eps * max column 2-norm of A.
 | |
| 
 | |
|     LAPACK's dlarf* routines are used to construct and apply the Householder
 | |
|     reflections.  The panel size (block size) is provided as an input
 | |
|     parameter, which defines the number of Householder vectors in a panel.
 | |
|     However, when the front is small (or when the remaining part
 | |
|     of a front is small) the block size is increased to include the entire
 | |
|     front.  "Small" is defined, below, as fronts with fewer than 5000 entries.
 | |
| 
 | |
|     NOTE: this function does not check its inputs.  If the caller runs out of
 | |
|     memory and passes NULL pointers, this function will segfault.
 | |
| */
 | |
| 
 | |
| #include "spqr_subset.hpp"
 | |
| #include "iostream"
 | |
| 
 | |
| #define SMALL 5000
 | |
| #define MINCHUNK 4
 | |
| #define MINCHUNK_RATIO 4
 | |
| 
 | |
| // =============================================================================
 | |
| // === spqr_private_house ======================================================
 | |
| // =============================================================================
 | |
| 
 | |
| // Construct a Householder reflection H = I - tau * v * v' such that H*x is
 | |
| //  reduced to zero except for the first element.  Returns X [0] = the first
 | |
| //  entry of H*x, and X [1:n-1] = the Householder vector V [1:n-1], where
 | |
| //  V [0] = 1.  If X [1:n-1] is zero, then the H=I (tau = 0) is returned,
 | |
| //  and V [1:n-1] is all zero.  In MATLAB (1-based indexing), ignoring the
 | |
| //  rescaling done in dlarfg/zlarfg, this function computes the following:
 | |
| 
 | |
| /*
 | |
|     function [x, tau] = house (x)
 | |
|     n = length (x) ;
 | |
|     beta = norm (x) ;
 | |
|     if (x (1) > 0)
 | |
|         beta = -beta ;
 | |
|     end
 | |
|     tau = (beta - x (1)) / beta ;
 | |
|     x (2:n) = x (2:n) / (x (1) - beta) ;
 | |
|     x (1) = beta ;
 | |
| */
 | |
| 
 | |
| //  Note that for the complex case, the reflection must be applied as H'*x,
 | |
| //  which requires that tau be conjugated in spqr_private_apply1.
 | |
| //
 | |
| //  This function performs about 3*n+2 flops
 | |
| 
 | |
| inline double spqr_private_larfg (Int n, double *X, cholmod_common *cc)
 | |
| {
 | |
|     double tau = 0 ;
 | |
|     BLAS_INT N = n, one = 1 ;
 | |
|     if (CHECK_BLAS_INT && !EQ (N,n))
 | |
|     {
 | |
|         cc->blas_ok = FALSE ;
 | |
|     }
 | |
|     if (!CHECK_BLAS_INT || cc->blas_ok)
 | |
|     {
 | |
|         LAPACK_DLARFG (&N, X, X + 1, &one, &tau) ;
 | |
|     }
 | |
|     return (tau) ;
 | |
| }
 | |
| 
 | |
| 
 | |
| inline Complex spqr_private_larfg (Int n, Complex *X, cholmod_common *cc)
 | |
| {
 | |
|     Complex tau = 0 ;
 | |
|     BLAS_INT N = n, one = 1 ;
 | |
|     if (CHECK_BLAS_INT && !EQ (N,n))
 | |
|     {
 | |
|         cc->blas_ok = FALSE ;
 | |
|     }
 | |
|     if (!CHECK_BLAS_INT || cc->blas_ok)
 | |
|     {
 | |
|         LAPACK_ZLARFG (&N, X, X + 1, &one, &tau) ;
 | |
|     }
 | |
|     return (tau) ;
 | |
| }
 | |
| 
 | |
| 
 | |
| template <typename Entry> Entry spqr_private_house  // returns tau
 | |
| (
 | |
|     // inputs, not modified
 | |
|     Int n,
 | |
| 
 | |
|     // input/output
 | |
|     Entry *X,           // size n
 | |
| 
 | |
|     cholmod_common *cc
 | |
| )
 | |
| {
 | |
|     return (spqr_private_larfg (n, X, cc)) ;
 | |
| }
 | |
| 
 | |
| 
 | |
| // =============================================================================
 | |
| // === spqr_private_apply1 =====================================================
 | |
| // =============================================================================
 | |
| 
 | |
| // Apply a single Householder reflection; C = C - tau * v * v' * C.  The
 | |
| // algorithm used by dlarf is:
 | |
| 
 | |
| /*
 | |
|     function C = apply1 (C, v, tau)
 | |
|     w = C'*v ;
 | |
|     C = C - tau*v*w' ;
 | |
| */
 | |
| 
 | |
| //  For the complex case, we need to apply H', which requires that tau be
 | |
| //  conjugated.
 | |
| //
 | |
| //  If applied to a single column, this function performs 2*n-1 flops to
 | |
| //  compute w, and 2*n+1 to apply it to C, for a total of 4*n flops.
 | |
| 
 | |
| inline void spqr_private_larf (Int m, Int n, double *V, double tau,
 | |
|     double *C, Int ldc, double *W, cholmod_common *cc)
 | |
| {
 | |
|     BLAS_INT M = m, N = n, LDC = ldc, one = 1 ;
 | |
|     char left = 'L' ;
 | |
|     if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc)))
 | |
|     {
 | |
|         cc->blas_ok = FALSE ;
 | |
|         
 | |
|     }
 | |
|     if (!CHECK_BLAS_INT || cc->blas_ok)
 | |
|     {
 | |
|         LAPACK_DLARF (&left, &M, &N, V, &one, &tau, C, &LDC, W) ;
 | |
|     }
 | |
| }
 | |
| 
 | |
| inline void spqr_private_larf (Int m, Int n, Complex *V, Complex tau,
 | |
|     Complex *C, Int ldc, Complex *W, cholmod_common *cc)
 | |
| {
 | |
|     BLAS_INT M = m, N = n, LDC = ldc, one = 1 ;
 | |
|     char left = 'L' ;
 | |
|     Complex conj_tau = spqr_conj (tau) ;
 | |
|     if (CHECK_BLAS_INT && !(EQ (M,m) && EQ (N,n) && EQ (LDC,ldc)))
 | |
|     {
 | |
|         cc->blas_ok = FALSE ;
 | |
|     }
 | |
|     if (!CHECK_BLAS_INT || cc->blas_ok)
 | |
|     {
 | |
|         LAPACK_ZLARF (&left, &M, &N, V, &one, &conj_tau, C, &LDC, W) ;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| template <typename Entry> void spqr_private_apply1
 | |
| (
 | |
|     // inputs, not modified
 | |
|     Int m,              // C is m-by-n
 | |
|     Int n,
 | |
|     Int ldc,            // leading dimension of C
 | |
|     Entry *V,           // size m, Householder vector V
 | |
|     Entry tau,          // Householder coefficient
 | |
| 
 | |
|     // input/output
 | |
|     Entry *C,           // size m-by-n
 | |
| 
 | |
|     // workspace, not defined on input or output
 | |
|     Entry *W,           // size n
 | |
| 
 | |
|     cholmod_common *cc
 | |
| )
 | |
| {
 | |
|     Entry vsave ;
 | |
|     if (m <= 0 || n <= 0)
 | |
|     {
 | |
|         return ;        // nothing to do
 | |
|     }
 | |
|     vsave = V [0] ;     // temporarily restore unit diagonal of V
 | |
|     V [0] = 1 ;
 | |
|     spqr_private_larf (m, n, V, tau, C, ldc, W, cc) ;
 | |
|     V [0] = vsave ;     // restore V [0]
 | |
| }
 | |
| 
 | |
| 
 | |
| // =============================================================================
 | |
| // === spqr_front ==============================================================
 | |
| // =============================================================================
 | |
| 
 | |
| // Factorize a front F into a sequence of Householder vectors H, and an upper
 | |
| // trapezoidal matrix R.  R may be a squeezed upper trapezoidal matrix if any
 | |
| // of the leading npiv columns are linearly dependent.  Returns the row index
 | |
| // rank that indicates the first entry in C, which is F (rank,npiv), or 0
 | |
| // on error.
 | |
| 
 | |
| template <typename Entry> Int spqr_front
 | |
| (
 | |
|     // input, not modified
 | |
|     Int m,              // F is m-by-n with leading dimension m
 | |
|     Int n,
 | |
|     Int npiv,           // number of pivot columns
 | |
|     double tol,         // a column is flagged as dead if its norm is <= tol
 | |
|     Int ntol,           // apply tol only to first ntol pivot columns
 | |
|     Int fchunk,         // block size for compact WY Householder reflections,
 | |
|                         // treated as 1 if fchunk <= 1
 | |
| 
 | |
|     // input/output
 | |
|     Entry *F,           // frontal matrix F of size m-by-n
 | |
|     Int *Stair,         // size n, entries F (Stair[k]:m-1, k) are all zero,
 | |
|                         // for each k = 0:n-1, and remain zero on output.
 | |
|     char *Rdead,        // size npiv; all zero on input.  If k is dead,
 | |
|                         // Rdead [k] is set to 1
 | |
| 
 | |
|     // output, not defined on input
 | |
|     Entry *Tau,         // size n, Householder coefficients
 | |
| 
 | |
|     // workspace, undefined on input and output
 | |
|     Entry *W,           // size b*n, where b = min (fchunk,n,m)
 | |
| 
 | |
|     // input/output
 | |
|     double *wscale,
 | |
|     double *wssq,
 | |
| 
 | |
|     cholmod_common *cc  // for cc->hypotenuse function
 | |
| )
 | |
| {
 | |
|     Entry tau ;
 | |
|     double wk ;
 | |
|     Entry *V ;
 | |
|     Int k, t, g, g1, nv, k1, k2, i, t0, vzeros, mleft, nleft, vsize, minchunk,
 | |
|         rank ;
 | |
| 
 | |
|     // NOTE: inputs are not checked for NULL (except if debugging enabled)
 | |
| 
 | |
|     ASSERT (F != NULL) ;
 | |
|     ASSERT (Stair != NULL) ;
 | |
|     ASSERT (Rdead != NULL) ;
 | |
|     ASSERT (Tau != NULL) ;
 | |
|     ASSERT (W != NULL) ;
 | |
|     ASSERT (m >= 0 && n >= 0) ;
 | |
| 
 | |
|     npiv = MAX (0, npiv) ;  // npiv must be between 0 and n
 | |
|     npiv = MIN (n, npiv) ;
 | |
| 
 | |
|     g1 = 0 ;                // row index of first queued-up Householder
 | |
|     k1 = 0 ;                // pending Householders are in F (g1:t, k1:k2-1)
 | |
|     k2 = 0 ;
 | |
|     V = F ;                 // Householder vectors start here
 | |
|     g = 0 ;                 // number of good Householders found
 | |
|     nv = 0 ;                // number of Householder reflections queued up
 | |
|     vzeros = 0 ;            // number of explicit zeros in queued-up H's
 | |
|     t = 0 ;                 // staircase of current column
 | |
|     fchunk = MAX (fchunk, 1) ;
 | |
|     minchunk = MAX (MINCHUNK, fchunk/MINCHUNK_RATIO) ;
 | |
|     rank = MIN (m,npiv) ;  // F (rank,npiv) is the first entry in C.  rank
 | |
|                            // is also the number of rows in the R block,
 | |
|                            // and the number of good pivot columns found.
 | |
| 
 | |
|     ntol = MIN (ntol, npiv) ;   // Note ntol can be negative, which means to
 | |
|                                 // not use tol at all.
 | |
| 
 | |
|     PR (("Front %ld by %ld with %ld pivots\n", m, n, npiv)) ;
 | |
|     for (k = 0 ; k < n ; k++)
 | |
|     {
 | |
| 
 | |
|         // ---------------------------------------------------------------------
 | |
|         // reduce the kth column of F to eliminate all but "diagonal" F (g,k)
 | |
|         // ---------------------------------------------------------------------
 | |
| 
 | |
|         // get the staircase for the kth column, and operate on the "diagonal"
 | |
|         // F (g,k); eliminate F (g+1:t-1, k) to zero
 | |
|         t0 = t ;            // t0 = staircase of column k-1
 | |
|         t = Stair [k] ;     // t = staircase of this column k
 | |
| 
 | |
|         PR (("k %ld g %ld m %ld n %ld npiv %ld\n", k, g, m, n, npiv)) ;
 | |
|         if (g >= m)
 | |
|         {
 | |
|             // F (g,k) is outside the matrix, so we're done.  If this happens
 | |
|             // when k < npiv, then there is no contribution block.
 | |
|             PR (("hit the wall, npiv: %ld k %ld rank %ld\n", npiv, k, rank)) ;
 | |
|             for ( ; k < npiv ; k++)
 | |
|             {
 | |
|                 Rdead [k] = 1 ;
 | |
|                 Stair [k] = 0 ;         // remaining pivot columns all dead
 | |
|                 Tau [k] = 0 ;
 | |
|             }
 | |
|             for ( ; k < n ; k++)
 | |
|             {
 | |
|                 Stair [k] = m ;         // non-pivotal columns
 | |
|                 Tau [k] = 0 ;
 | |
|             }
 | |
|             ASSERT (nv == 0) ;          // there can be no pending updates
 | |
|             return (rank) ;
 | |
|         }
 | |
| 
 | |
|         // if t < g+1, then this column is all zero; fix staircase so that R is
 | |
|         // always inside the staircase.
 | |
|         t = MAX (g+1,t) ;
 | |
|         Stair [k] = t ;
 | |
| 
 | |
|         // ---------------------------------------------------------------------
 | |
|         // If t just grew a lot since the last t, apply H now to all of F
 | |
|         // ---------------------------------------------------------------------
 | |
| 
 | |
|         // vzeros is the number of zero entries in V after including the next
 | |
|         // Householder vector.  If it would exceed 50% of the size of V,
 | |
|         // apply the pending Householder reflections now, but only if
 | |
|         // enough vectors have accumulated.
 | |
| 
 | |
|         vzeros += nv * (t - t0) ;
 | |
|         if (nv >= minchunk)
 | |
|         {
 | |
|             vsize = (nv*(nv+1))/2 + nv*(t-g1-nv) ;
 | |
|             if (vzeros > MAX (16, vsize/2))
 | |
|             {
 | |
|                 // apply pending block of Householder reflections
 | |
|                 PR (("(1) apply k1 %ld k2 %ld\n", k1, k2)) ;
 | |
|                 spqr_larftb (
 | |
|                     0,                          // method 0: Left, Transpose
 | |
|                     t0-g1, n-k2, nv, m, m,
 | |
|                     V,                          // F (g1:t-1, k1:k1+nv-1)
 | |
|                     &Tau [k1],                  // Tau (k1:k-1)
 | |
|                     &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
 | |
|                     W, cc) ;                    // size nv*nv + nv*(n-k2)
 | |
|                 nv = 0 ;        // clear queued-up Householder reflections
 | |
|                 vzeros = 0 ;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // ---------------------------------------------------------------------
 | |
|         // find a Householder reflection that reduces column k
 | |
|         // ---------------------------------------------------------------------
 | |
| 
 | |
|         tau = spqr_private_house (t-g, &F [INDEX (g,k,m)], cc) ;
 | |
| 
 | |
|         // ---------------------------------------------------------------------
 | |
|         // check to see if the kth column is OK
 | |
|         // ---------------------------------------------------------------------
 | |
| 
 | |
|         if (k < ntol && (wk = spqr_abs (F [INDEX (g,k,m)], cc)) <= tol)
 | |
|         {
 | |
|             // -----------------------------------------------------------------
 | |
|             // norm (F (g:t-1, k)) is too tiny; the kth pivot column is dead
 | |
|             // -----------------------------------------------------------------
 | |
| 
 | |
|             // keep track of the 2-norm of w, the dead column 2-norms
 | |
|             if (wk != 0)
 | |
|             {
 | |
|                 // see also LAPACK's dnrm2 function
 | |
|                 if ((*wscale) == 0)
 | |
|                 {
 | |
|                     // this is the nonzero first entry in w
 | |
|                     (*wssq) = 1 ;
 | |
|                 }
 | |
|                 if ((*wscale) < wk)
 | |
|                 {
 | |
|                     double rr = (*wscale) / wk ;
 | |
|                     (*wssq) = 1 + (*wssq) * rr * rr ;
 | |
|                     (*wscale) = wk ;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     double rr = wk / (*wscale) ;
 | |
|                     (*wssq) += rr * rr ;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             // zero out F (g:m-1,k) and flag it as dead
 | |
|             for (i = g ; i < m ; i++)
 | |
|             {
 | |
|                 // This is not strictly necessary.  On output, entries outside
 | |
|                 // the staircase are ignored.
 | |
|                 F [INDEX (i,k,m)] = 0 ;
 | |
|             }
 | |
|             Stair [k] = 0 ;
 | |
|             Tau [k] = 0 ;
 | |
|             Rdead [k] = 1 ;
 | |
| 
 | |
|             if (nv > 0)
 | |
|             {
 | |
|                 // apply pending block of Householder reflections
 | |
|                 PR (("(2) apply k1 %ld k2 %ld\n", k1, k2)) ;
 | |
|                 spqr_larftb (
 | |
|                     0,                          // method 0: Left, Transpose
 | |
|                     t0-g1, n-k2, nv, m, m,
 | |
|                     V,                          // F (g1:t-1, k1:k1+nv-1)
 | |
|                     &Tau [k1],                  // Tau (k1:k-1)
 | |
|                     &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
 | |
|                     W, cc) ;                    // size nv*nv + nv*(n-k2)
 | |
|                 nv = 0 ;        // clear queued-up Householder reflections
 | |
|                 vzeros = 0 ;
 | |
|             }
 | |
| 
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             // -----------------------------------------------------------------
 | |
|             // one more good pivot column found
 | |
|             // -----------------------------------------------------------------
 | |
| 
 | |
|             Tau [k] = tau ;             // save the Householder coefficient
 | |
|             if (nv == 0)
 | |
|             {
 | |
|                 // start the queue of pending Householder updates, and define
 | |
|                 // the current panel as k1:k2-1
 | |
|                 g1 = g ;                        // first row of V
 | |
|                 k1 = k ;                        // first column of V
 | |
|                 k2 = MIN (n, k+fchunk) ;        // k2-1 is last col in panel
 | |
|                 V = &F [INDEX (g1,k1,m)] ;      // pending V starts here
 | |
| 
 | |
|                 // check for switch to unblocked code
 | |
|                 mleft = m-g1 ;                  // number of rows left
 | |
|                 nleft = n-k1 ;                  // number of columns left
 | |
|                 if (mleft * (nleft-(fchunk+4)) < SMALL || mleft <= fchunk/2
 | |
|                     || fchunk <= 1)
 | |
|                 {
 | |
|                     // remaining matrix is small; switch to unblocked code by
 | |
|                     // including the rest of the matrix in the panel.  Always
 | |
|                     // use unblocked code if fchunk <= 1.
 | |
|                     k2 = n ;
 | |
|                 }
 | |
|             }
 | |
|             nv++ ;  // one more pending update; V is F (g1:t-1, k1:k1+nv-1)
 | |
| 
 | |
|             // -----------------------------------------------------------------
 | |
|             // keep track of "pure" flops, for performance testing only
 | |
|             // -----------------------------------------------------------------
 | |
| 
 | |
|             // The Householder vector is of length t-g, including the unit
 | |
|             // diagonal, and takes 3*(t-g) flops to compute.  It will be
 | |
|             // applied as a block, but compute the "pure" flops by assuming
 | |
|             // that this single Householder vector is computed and then applied
 | |
|             // just by itself to the rest of the frontal matrix (columns
 | |
|             // k+1:n-1, or n-k-1 columns).  Applying the Householder reflection
 | |
|             // to just one column takes 4*(t-g) flops.  This computation only
 | |
|             // works if TBB is disabled, merely because it uses a global
 | |
|             // variable to keep track of the flop count.  If TBB is used, this
 | |
|             // computation may result in a race condition; it is disabled in
 | |
|             // that case.
 | |
| 
 | |
|             FLOP_COUNT ((t-g) * (3 + 4 * (n-k-1))) ;
 | |
| 
 | |
|             // -----------------------------------------------------------------
 | |
|             // apply the kth Householder reflection to the current panel
 | |
|             // -----------------------------------------------------------------
 | |
| 
 | |
|             // F (g:t-1, k+1:k2-1) -= v * tau * v' * F (g:t-1, k+1:k2-1), where
 | |
|             // v is stored in F (g:t-1,k).  This applies just one reflection
 | |
|             // to the current panel.
 | |
|             PR (("apply 1: k %ld\n", k)) ;
 | |
|             spqr_private_apply1 (t-g, k2-k-1, m, &F [INDEX (g,k,m)], tau,
 | |
|                 &F [INDEX (g,k+1,m)], W, cc) ;
 | |
| 
 | |
|             g++ ;   // one more pivot found
 | |
| 
 | |
|             // -----------------------------------------------------------------
 | |
|             // apply the Householder reflections if end of panel reached
 | |
|             // -----------------------------------------------------------------
 | |
| 
 | |
|             if (k == k2-1 || g == m)            // or if last pivot is found
 | |
|             {
 | |
|                 // apply pending block of Householder reflections
 | |
|                 PR (("(3) apply k1 %ld k2 %ld\n", k1, k2)) ;
 | |
|                 spqr_larftb (
 | |
|                     0,                          // method 0: Left, Transpose
 | |
|                     t-g1, n-k2, nv, m, m,
 | |
|                     V,                          // F (g1:t-1, k1:k1+nv-1)
 | |
|                     &Tau [k1],                  // Tau (k1:k2-1)
 | |
|                     &F [INDEX (g1,k2,m)],       // F (g1:t-1, k2:n-1)
 | |
|                     W, cc) ;                    // size nv*nv + nv*(n-k2)
 | |
|                 nv = 0 ;        // clear queued-up Householder reflections
 | |
|                 vzeros = 0 ;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // ---------------------------------------------------------------------
 | |
|         // determine the rank of the pivot columns
 | |
|         // ---------------------------------------------------------------------
 | |
| 
 | |
|         if (k == npiv-1)
 | |
|         {
 | |
|             // the rank is the number of good columns found in the first
 | |
|             // npiv columns.  It is also the number of rows in the R block.
 | |
|             // F (rank,npiv) is first entry in the C block.
 | |
|             rank = g ;
 | |
|             PR (("rank of Front pivcols: %ld\n", rank)) ;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (CHECK_BLAS_INT && !cc->blas_ok)
 | |
|     {
 | |
|     	  // This cannot occur if the BLAS_INT and the Int are the same integer.
 | |
|         // In that case, CHECK_BLAS_INT is FALSE at compile-time, and the
 | |
|         // compiler will then remove this as dead code.
 | |
|         ERROR (CHOLMOD_INVALID, "problem too large for the BLAS") ;
 | |
|         return (0) ;
 | |
|     }
 | |
| 
 | |
|     return (rank) ;
 | |
| }
 | |
| 
 | |
| 
 | |
| // =============================================================================
 | |
| 
 | |
| template Int spqr_front <double>
 | |
| (
 | |
|     // input, not modified
 | |
|     Int m,              // F is m-by-n with leading dimension m
 | |
|     Int n,
 | |
|     Int npiv,           // number of pivot columns
 | |
|     double tol,         // a column is flagged as dead if its norm is <= tol
 | |
|     Int ntol,           // apply tol only to first ntol pivot columns
 | |
|     Int fchunk,         // block size for compact WY Householder reflections,
 | |
|                         // treated as 1 if fchunk <= 1 (in which case the
 | |
|                         // unblocked code is used).
 | |
| 
 | |
|     // input/output
 | |
|     double *F,          // frontal matrix F of size m-by-n
 | |
|     Int *Stair,         // size n, entries F (Stair[k]:m-1, k) are all zero,
 | |
|                         // and remain zero on output.
 | |
|     char *Rdead,        // size npiv; all zero on input.  If k is dead,
 | |
|                         // Rdead [k] is set to 1
 | |
| 
 | |
|     // output, not defined on input
 | |
|     double *Tau,        // size n, Householder coefficients
 | |
| 
 | |
|     // workspace, undefined on input and output
 | |
|     double *W,          // size b*n, where b = min (fchunk,n,m)
 | |
| 
 | |
|     // input/output
 | |
|     double *wscale,
 | |
|     double *wssq,
 | |
| 
 | |
|     cholmod_common *cc  // for cc->hypotenuse function
 | |
| ) ;
 | |
| 
 | |
| // =============================================================================
 | |
| 
 | |
| template Int spqr_front <Complex>
 | |
| (
 | |
|     // input, not modified
 | |
|     Int m,              // F is m-by-n with leading dimension m
 | |
|     Int n,
 | |
|     Int npiv,           // number of pivot columns
 | |
|     double tol,         // a column is flagged as dead if its norm is <= tol
 | |
|     Int ntol,           // apply tol only to first ntol pivot columns
 | |
|     Int fchunk,         // block size for compact WY Householder reflections,
 | |
|                         // treated as 1 if fchunk <= 1 (in which case the
 | |
|                         // unblocked code is used). 
 | |
| 
 | |
|     // input/output
 | |
|     Complex *F,         // frontal matrix F of size m-by-n
 | |
|     Int *Stair,         // size n, entries F (Stair[k]:m-1, k) are all zero,
 | |
|                         // and remain zero on output.
 | |
|     char *Rdead,        // size npiv; all zero on input.  If k is dead,
 | |
|                         // Rdead [k] is set to 1
 | |
| 
 | |
|     // output, not defined on input
 | |
|     Complex *Tau,       // size n, Householder coefficients
 | |
| 
 | |
|     // workspace, undefined on input and output
 | |
|     Complex *W,         // size b*n, where b = min (fchunk,n,m)
 | |
| 
 | |
|     // input/output
 | |
|     double *wscale,
 | |
|     double *wssq,
 | |
| 
 | |
|     cholmod_common *cc  // for cc->hypotenuse function
 | |
| ) ;
 |