gtsam/base/DenseQR.cpp

198 lines
5.9 KiB
C++

/*
* DenseQR.cpp
*
* Created on: Oct 19, 2010
* Author: nikai
* Description: Dense QR, inspired by Tim Davis's dense solver
*/
#include <cassert>
#include <math.h>
#include <algorithm>
#include "DenseQR.h"
// all the lapack functions we need here
extern "C" {
void dlarft_ (char *direct, char *storev, int *n, int *k, double *V, int *ldv, double *Tau, double *T, int *ldt) ;
void dlarfb_ (char *side, char *trans, char *direct, char *storev, int *m, int *n, int *k, double *V, int *ldv, double *T, int *ldt, double *C, int *ldc, double *Work, int *ldwork) ;
void dlarfg_ (int *n, double *alpha, double *X, int *incx, double *tau) ;
void dlarf_ (char *side, int *m, int *n, double *V, int *incv, double *tau, double *C, int *ldc, double *Work) ;
}
using namespace std;
namespace gtsam {
/* ************************************************************************* */
/**
* LARF applies a real elementary reflector H to a real m by n matrix
* C, from either the left or the right. H is represented in the form
*/
void dlarf_wrap(long m, long n, long ldc, double *V, double tau, double *C, double *W)
{
static char left = 'L' ;
double vsave ;
if (m <= 0 || n <= 0) return ;
vsave = V [0] ; // temporarily restore unit diagonal of V
V [0] = 1 ;
int m_ = m, n_ = n, ldc_ = ldc, one = 1 ;
dlarf_ (&left, &m_, &n_, V, &one, &tau, C, &ldc_, W) ;
V [0] = vsave ; // restore V [0]
}
/* ************************************************************************* */
void dlarftb_wrap(long m, long n, long k, long ldc, long ldv, double *V, double *Tau, double *C, double *W)
{
static char direct = 'F';
static char storev = 'C';
static char side = 'L';
static char trans = 'T';
if (m <= 0 || n <= 0 || k <= 0) return ;
double *T, *Work ;
T = W ; // triangular k-by-k matrix for block reflector
Work = W + k*k ; // workspace of size n*k or m*k for larfb
// construct and apply the k-by-k upper triangular matrix T
// larft and larfb are always used "Forward" and "Columnwise"
assert (m >= k) ;
int m_ = m, n_ = n, k_ = k, ldv_ = ldv, ldc_ = ldc;
dlarft_(&direct, &storev, &m_, &k_, V, &ldv_, Tau, T, &k_) ;
// Left, Transpose, Forward, Columwise:
dlarfb_(&side, &trans, &direct, &storev, &m_, &n_, &k_, V, &ldv_,
T, &k_, C, &ldc_, Work, &n_);
}
/* ************************************************************************* */
long DenseQR(long m, long n, long npiv, double tol, long ntol, long fchunk,
double *F, long *Stair, char *Rdead, double *Tau, double *W, double *wscale, double *wssq) {
double tau, wk, *V;
long k, t, g, g1, nv, k1, k2, i, t0, vzeros, mleft, nleft, vsize, minchunk, rank ;
assert (F != NULL) ;
assert (Stair != NULL) ;
assert (Rdead != NULL) ;
assert (Tau != NULL) ;
assert (W != NULL) ;
assert (m >= 0 && n >= 0) ;
npiv = max (0l, npiv) ; // npiv must be between 0 and n
npiv = min (n, npiv) ;
g1 = 0 ; // row index of first queued-up Householder
k1 = 0 ; // pending Householders are in F (g1:t, k1:k2-1)
k2 = 0 ;
V = F ; // Householder vectors start here
g = 0 ; // number of good Householders found
nv = 0 ; // number of Householder reflections queued up
vzeros = 0 ; // number of explicit zeros in queued-up H's
t = 0 ; // staircase of current column
fchunk = max (fchunk, 1l) ;
minchunk = max (4l, fchunk/4l) ;
rank = min (m,npiv) ;
ntol = min (ntol, npiv) ;
for (k = 0; k < n; k++) {
t0 = t; // t0 = staircase of column k-1
t = Stair[k]; // t = staircase of this column k
if (g >= m) {
for (; k < npiv; k++) {
Rdead[k] = 1;
Stair[k] = 0;
Tau[k] = 0;
}
for (; k < n; k++) {
Stair[k] = m;
Tau[k] = 0;
}
assert (nv == 0);
return (rank);
}
t = max(g + 1, t);
Stair[k] = t;
vzeros += nv * (t - t0);
if (nv >= minchunk) {
vsize = (nv * (nv + 1)) / 2 + nv * (t - g1 - nv);
if (vzeros > max(16l, vsize / 2)) {
dlarftb_wrap(t0 - g1, n - k2, nv, m, m, V, // F (g1:t-1, k1:k1+nv-1)
&Tau[k1], &F[g1+k2*m], W);
nv = 0;
vzeros = 0;
}
}
// find a Householder reflection that reduces column k
int n_ = t - g, one = 1;
double *X = &F[g+k*m];
dlarfg_(&n_, X, X + 1, &one, &tau);
// check to see if the kth column is OK
if (k < ntol && (wk = fabs(F[g+k*m])) <= tol) {
if (wk != 0) {
if ((*wscale) == 0) {
(*wssq) = 1;
}
if ((*wscale) < wk) {
double rr = (*wscale) / wk;
(*wssq) = 1 + (*wssq) * rr * rr;
(*wscale) = wk;
} else {
double rr = wk / (*wscale);
(*wssq) += rr * rr;
}
}
// zero out F (g:m-1,k) and flag it as dead
for (i = g; i < m; i++)
F[i+k*m] = 0;
Stair[k] = 0;
Tau[k] = 0;
Rdead[k] = 1;
// apply pending block of Householder reflections
if (nv > 0) {
dlarftb_wrap(t0 - g1, n - k2, nv, m, m, V, &Tau[k1], &F[g1+k2*m], W);
nv = 0; // clear queued-up Householder reflections
vzeros = 0;
}
} else {
// one more good pivot column found
Tau[k] = tau;
if (nv == 0) {
g1 = g;
k1 = k;
k2 = min(n, k + fchunk);
V = &F[g1+k1*m];
// check for switch to unblocked code
mleft = m - g1; // number of rows left
nleft = n - k1; // number of columns left
if (mleft * (nleft - (fchunk + 4)) < 5000 || mleft <= fchunk / 2
|| fchunk <= 1)
k2 = n;
}
nv++; // one more pending update; V is F (g1:t-1, k1:k1+nv-1)
// apply the kth Householder reflection to the current panel
dlarf_wrap(t - g, k2 - k - 1, m, &F[g+k*m], tau, &F[g+(k+1)*m], W);
g++; // one more pivot found
if (k == k2 - 1 || g == m) {
dlarftb_wrap(t - g1, n - k2, nv, m, m, V, &Tau[k1], &F[g1+(k2*m)], W);
nv = 0; // clear queued-up Householder reflections
vzeros = 0;
}
}
if (k == npiv - 1) rank = g;
}
return rank;
}
}