diff --git a/gtsam/linear/QPSolver.cpp b/gtsam/linear/QPSolver.cpp new file mode 100644 index 000000000..eeebab7e7 --- /dev/null +++ b/gtsam/linear/QPSolver.cpp @@ -0,0 +1,321 @@ +/* + * QPSolver.cpp + * @brief: + * @date: Apr 15, 2014 + * @author: thduynguyen + */ + +#include +#include + +using namespace std; + +namespace gtsam { + +/* ************************************************************************* */ +QPSolver::QPSolver(const GaussianFactorGraph& graph) : + graph_(graph), fullFactorIndices_(graph) { + + // Split the original graph into unconstrained and constrained part + // and collect indices of constrained factors + for (size_t i = 0; i < graph.nrFactors(); ++i) { + // obtain the factor and its noise model + JacobianFactor::shared_ptr jacobian = toJacobian(graph.at(i)); + if (jacobian && jacobian->get_model() + && jacobian->get_model()->isConstrained()) { + constraintIndices_.push_back(i); + } + } + + // Collect constrained variable keys + KeySet constrainedVars; + BOOST_FOREACH(size_t index, constraintIndices_) { + KeyVector keys = graph[index]->keys(); + constrainedVars.insert(keys.begin(), keys.end()); + } + + // Collect unconstrained hessians of constrained vars to build dual graph + freeHessians_ = unconstrainedHessiansOfConstrainedVars(graph, constrainedVars); + freeHessianFactorIndex_ = VariableIndex(*freeHessians_); +} + + +/* ************************************************************************* */ +GaussianFactorGraph::shared_ptr QPSolver::unconstrainedHessiansOfConstrainedVars( + const GaussianFactorGraph& graph, const KeySet& constrainedVars) const { + VariableIndex variableIndex(graph); + GaussianFactorGraph::shared_ptr hfg = boost::make_shared(); + + // Collect all factors involving constrained vars + FastSet factors; + BOOST_FOREACH(Key key, constrainedVars) { + VariableIndex::Factors factorsOfThisVar = variableIndex[key]; + BOOST_FOREACH(size_t factorIndex, factorsOfThisVar) { + factors.insert(factorIndex); + } + } + + // Convert each factor into Hessian + BOOST_FOREACH(size_t factorIndex, factors) { + if (!graph[factorIndex]) continue; + // See if this is a Jacobian factor + JacobianFactor::shared_ptr jf = toJacobian(graph[factorIndex]); + if (jf) { + // Dealing with mixed constrained factor + if (jf->get_model() && jf->isConstrained()) { + // Turn a mixed-constrained factor into a factor with 0 information on the constrained part + Vector sigmas = jf->get_model()->sigmas(); + Vector newPrecisions(sigmas.size()); + bool mixed = false; + for (size_t s=0; sclone()); + newJacobian->setModel(noiseModel::Diagonal::Precisions(newPrecisions)); + hfg->push_back(HessianFactor(*newJacobian)); + } + } + else { // unconstrained Jacobian + // Convert the original linear factor to Hessian factor + hfg->push_back(HessianFactor(*graph[factorIndex])); + } + } + else { // If it's not a Jacobian, it should be a hessian factor. Just add! + hfg->push_back(graph[factorIndex]); + } + + } + return hfg; +} + +/* ************************************************************************* */ +GaussianFactorGraph QPSolver::buildDualGraph(const GaussianFactorGraph& graph, + const VectorValues& x0) const { + // The dual graph to return + GaussianFactorGraph dualGraph; + + // For each variable xi involving in some constraint, compute the unconstrained gradient + // wrt xi from the prebuilt freeHessian graph + // \grad f(xi) = \frac{\partial f}{\partial xi}' = \sum_j G_ij*xj - gi + BOOST_FOREACH(const VariableIndex::value_type& xiKey_factors, freeHessianFactorIndex_) { + Key xiKey = xiKey_factors.first; + VariableIndex::Factors xiFactors = xiKey_factors.second; + + // Find xi's dim from the first factor on xi + if (xiFactors.size() == 0) continue; + GaussianFactor::shared_ptr xiFactor0 = freeHessians_->at(0); + size_t xiDim = xiFactor0->getDim(xiFactor0->find(xiKey)); + + // Compute gradf(xi) = \frac{\partial f}{\partial xi}' = \sum_j G_ij*xj - gi + Vector gradf_xi = zero(xiDim); + BOOST_FOREACH(size_t factorIx, xiFactors) { + HessianFactor::shared_ptr factor = toHessian(freeHessians_->at(factorIx)); + Factor::const_iterator xi = factor->find(xiKey); + // Sum over Gij*xj for all xj connecting to xi + for (Factor::const_iterator xj = factor->begin(); xj != factor->end(); + ++xj) { + // Obtain Gij from the Hessian factor + // Hessian factor only stores an upper triangular matrix, so be careful when i>j + Matrix Gij; + if (xi > xj) { + Matrix Gji = factor->info(xj, xi); + Gij = Gji.transpose(); + } + else { + Gij = factor->info(xi, xj); + } + // Accumulate Gij*xj to gradf + Vector x0_j = x0.at(*xj); + gradf_xi += Gij * x0_j; + } + // Subtract the linear term gi + gradf_xi += -factor->linearTerm(xi); + } + + // Obtain the jacobians for lambda variables from their corresponding constraints + // gradc_k(xi) = \frac{\partial c_k}{\partial xi}' + std::vector > lambdaTerms; // collection of lambda_k, and gradc_k + BOOST_FOREACH(size_t factorIndex, fullFactorIndices_[xiKey]) { + JacobianFactor::shared_ptr factor = toJacobian(graph.at(factorIndex)); + if (!factor || !factor->isConstrained()) continue; + // Gradient is the transpose of the Jacobian: A_k = gradc_k(xi) = \frac{\partial c_k}{\partial xi}' + // Each column for each lambda_k corresponds to [the transpose of] each constrained row factor + Matrix A_k = factor->getA(factor->find(xiKey)).transpose(); + // Deal with mixed sigmas: no information if sigma != 0 + Vector sigmas = factor->get_model()->sigmas(); + for (size_t sigmaIx = 0; sigmaIx0) + // we have no information about it + if (fabs(sigmas[sigmaIx]) > 1e-9) { + A_k.col(sigmaIx) = zero(A_k.rows()); + } + } + // Use factorIndex as the lambda's key. + lambdaTerms.push_back(make_pair(factorIndex, A_k)); + } + // Enforce constrained noise model so lambdas are solved with QR + // and should exactly satisfy all the equations + dualGraph.push_back(JacobianFactor(lambdaTerms, gradf_xi, + noiseModel::Constrained::All(gradf_xi.size()))); + + // Add 0 priors on all lambdas to make sure the graph is solvable + // TODO: Can we do for all lambdas like this, or only for those with no information? + BOOST_FOREACH(size_t factorIndex, fullFactorIndices_[xiKey]) { + JacobianFactor::shared_ptr factor = toJacobian(graph.at(factorIndex)); + if (!factor || !factor->isConstrained()) continue; + size_t dim= factor->get_model()->dim(); + // Use factorIndex as the lambda's key. + dualGraph.push_back(JacobianFactor(factorIndex, eye(dim), zero(dim))); + } + } + return dualGraph; +} + +/* ************************************************************************* */ +std::pair QPSolver::findWorstViolatedActiveIneq(const VectorValues& lambdas) const { + int worstFactorIx = -1, worstSigmaIx = -1; + // preset the maxLambda to 0.0: if lambda is <= 0.0, the constraint is either + // inactive or a good ineq constraint, so we don't care! + double maxLambda = 0.0; + BOOST_FOREACH(size_t factorIx, constraintIndices_) { + Vector lambda = lambdas.at(factorIx); + Vector orgSigmas = toJacobian(graph_.at(factorIx))->get_model()->sigmas(); + for (size_t j = 0; j maxLambda) { + worstFactorIx = factorIx; + worstSigmaIx = j; + maxLambda = lambda[j]; + } + } + return make_pair(worstFactorIx, worstSigmaIx); +} + +/* ************************************************************************* */ +bool QPSolver::updateWorkingSetInplace(GaussianFactorGraph& workingGraph, + int factorIx, int sigmaIx, double newSigma) const { + if (factorIx < 0 || sigmaIx < 0) + return false; + Vector sigmas = toJacobian(workingGraph.at(factorIx))->get_model()->sigmas(); + sigmas[sigmaIx] = newSigma; // removing it from the working set + toJacobian(workingGraph.at(factorIx))->setModel(true, sigmas); + return true; +} + +/* ************************************************************************* */ +boost::tuple QPSolver::computeStepSize(const GaussianFactorGraph& workingGraph, + const VectorValues& xk, const VectorValues& p) const { + static bool debug = true; + + double minAlpha = 1.0; + int closestFactorIx = -1, closestSigmaIx = -1; + BOOST_FOREACH(size_t factorIx, constraintIndices_) { + JacobianFactor::shared_ptr jacobian = toJacobian(workingGraph.at(factorIx)); + Vector sigmas = jacobian->get_model()->sigmas(); + Vector b = jacobian->getb(); + for (size_t s = 0; sbegin(); xj != jacobian->end(); ++xj) { + Vector pj = p.at(*xj); + Vector aj = jacobian->getA(xj).row(s); + ajTp += aj.dot(pj); + } + if (debug) { + cout << "s, ajTp: " << s << " " << ajTp << endl; + } + + // Check if aj'*p >0. Don't care if it's not. + if (ajTp<=0) continue; + + // Compute aj'*xk + double ajTx = 0.0; + for (Factor::const_iterator xj = jacobian->begin(); xj != jacobian->end(); ++xj) { + Vector xkj = xk.at(*xj); + Vector aj = jacobian->getA(xj).row(s); + ajTx += aj.dot(xkj); + } + if (debug) { + cout << "b[s], ajTx: " << b[s] << " " << ajTx << " " << ajTp << endl; + } + + // alpha = (bj - aj'*xk) / (aj'*p) + double alpha = (b[s] - ajTx)/ajTp; + if (debug) { + cout << "alpha: " << alpha << endl; + } + + // We want the minimum of all those max alphas + if (alpha < minAlpha) { + closestFactorIx = factorIx; + closestSigmaIx = s; + minAlpha = alpha; + } + } + } + } + return boost::make_tuple(minAlpha, closestFactorIx, closestSigmaIx); +} + +/* ************************************************************************* */ +bool QPSolver::iterateInPlace(GaussianFactorGraph& workingGraph, VectorValues& currentSolution) const { + static bool debug = true; + // Obtain the solution from the current working graph + VectorValues newSolution = workingGraph.optimize(); + if (debug) newSolution.print("New solution:"); + + // If we CAN'T move further + if (newSolution.equals(currentSolution, 1e-5)) { + // Compute lambda from the dual graph + GaussianFactorGraph dualGraph = buildDualGraph(workingGraph, newSolution); + if (debug) dualGraph.print("Dual graph: "); + VectorValues lambdas = dualGraph.optimize(); + if (debug) lambdas.print("lambdas :"); + + int factorIx, sigmaIx; + boost::tie(factorIx, sigmaIx) = findWorstViolatedActiveIneq(lambdas); + + // Try to disactivate the weakest violated ineq constraints + // if not successful, i.e. all ineq constraints are satisfied: We have the solution!! + if (!updateWorkingSetInplace(workingGraph, factorIx, sigmaIx, -1.0)) + return true; + } + else { + // If we CAN make some progress + // Adapt stepsize if some inactive inequality constraints complain about this move + double alpha; + int factorIx, sigmaIx; + VectorValues p = newSolution - currentSolution; + boost::tie(alpha, factorIx, sigmaIx) = computeStepSize(workingGraph, currentSolution, p); + if (debug) { + cout << "alpha, factorIx, sigmaIx: " << alpha << " " << factorIx << " " << sigmaIx << endl; + } + // also add to the working set the one that complains the most + updateWorkingSetInplace(workingGraph, factorIx, sigmaIx, 0.0); + // step! + currentSolution = currentSolution + alpha * p; + } + + return false; +} + +/* ************************************************************************* */ +VectorValues QPSolver::optimize(const VectorValues& initials) const { + GaussianFactorGraph workingGraph = graph_.clone(); + VectorValues currentSolution = initials; + bool converged = false; + while (!converged) { + converged = iterateInPlace(workingGraph, currentSolution); + } + return currentSolution; +} + + +} /* namespace gtsam */ diff --git a/gtsam/linear/QPSolver.h b/gtsam/linear/QPSolver.h new file mode 100644 index 000000000..b397aa513 --- /dev/null +++ b/gtsam/linear/QPSolver.h @@ -0,0 +1,160 @@ +/* + * QPSolver.h + * @brief: A quadratic programming solver implements the active set method + * @date: Apr 15, 2014 + * @author: thduynguyen + */ + +#pragma once + +#include +#include + +namespace gtsam { + +/** + * This class implements the active set method to solve quadratic programming problems + * encoded in a GaussianFactorGraph with special mixed constrained noise models, in which + * a negative sigma denotes an inequality <=0 constraint, + * a zero sigma denotes an equality =0 constraint, + * and a positive sigma denotes a normal Gaussian noise model. + */ +class QPSolver { + const GaussianFactorGraph& graph_; //!< the original graph, can't be modified! + FastVector constraintIndices_; //!< Indices of constrained factors in the original graph + GaussianFactorGraph::shared_ptr freeHessians_; //!< unconstrained Hessians of constrained variables + VariableIndex freeHessianFactorIndex_; //!< indices of unconstrained Hessian factors of constrained variables + // gtsam calls it "VariableIndex", but I think FactorIndex + // makes more sense, because it really stores factor indices. + VariableIndex fullFactorIndices_; //!< indices of factors involving each variable. + // gtsam calls it "VariableIndex", but I think FactorIndex + // makes more sense, because it really stores factor indices. + +public: + /// Constructor + QPSolver(const GaussianFactorGraph& graph); + + /// Return indices of all constrained factors + FastVector constraintIndices() const { return constraintIndices_; } + + + /// Return the Hessian factor graph of constrained variables + GaussianFactorGraph::shared_ptr freeHessiansOfConstrainedVars() const { + return freeHessians_; + } + + /** + * Build the dual graph to solve for the Lagrange multipliers. + * + * The Lagrangian function is: + * L(X,lambdas) = f(X) - \sum_k lambda_k * c_k(X), + * where the unconstrained part is + * f(X) = 0.5*X'*G*X - X'*g + 0.5*f0 + * and the linear equality constraints are + * c1(X), c2(X), ..., cm(X) + * + * Take the derivative of L wrt X at the solution and set it to 0, we have + * \grad f(X) = \sum_k lambda_k * \grad c_k(X) (*) + * + * For each set of rows of (*) corresponding to a variable xi involving in some constraints + * we have: + * \grad f(xi) = \frac{\partial f}{\partial xi}' = \sum_j G_ij*xj - gi + * \grad c_k(xi) = \frac{\partial c_k}{\partial xi}' + * + * Note: If xi does not involve in any constraint, we have the trivial condition + * \grad f(Xi) = 0, which should be satisfied as a usual condition for unconstrained variables. + * + * So each variable xi involving in some constraints becomes a linear factor A*lambdas - b = 0 + * on the constraints' lambda multipliers, as follows: + * - The jacobian term A_k for each lambda_k is \grad c_k(xi) + * - The constant term b is \grad f(xi), which can be computed from all unconstrained + * Hessian factors connecting to xi: \grad f(xi) = \sum_j G_ij*xj - gi + */ + GaussianFactorGraph buildDualGraph(const GaussianFactorGraph& graph, + const VectorValues& x0) const; + + + /** + * Find the BAD active ineq that pulls x strongest to the wrong direction of its constraint + * (i.e. it is pulling towards >0, while its feasible region is <=0) + * + * For active ineq constraints (those that are enforced as eq constraints now + * in the working set), we want lambda < 0. + * This is because: + * - From the Lagrangian L = f - lambda*c, we know that the constraint force is + * (lambda * \grad c) = \grad f, because it cancels out the unconstrained + * unconstrained force (-\grad f), which is pulling x in the opposite direction + * of \grad f towards the unconstrained minimum point + * - We also know that at the constraint surface \grad c points toward + (>= 0), + * while we are solving for - (<=0) constraint + * - So, we want the constraint force (lambda * \grad c) to to pull x + * towards the opposite direction of \grad c, i.e. towards the area + * where the ineq constraint <=0 is satisfied. + * - Hence, we want lambda < 0 + * + * So active ineqs with lambda > 0 are BAD. And we want the worst one with the largest lambda. + * + */ + std::pair findWorstViolatedActiveIneq(const VectorValues& lambdas) const; + + /** + * Deactivate or activate an ineq constraint in place + * Warning: modify in-place to avoid copy/clone + * @return true if update successful + */ + bool updateWorkingSetInplace(GaussianFactorGraph& workingGraph, + int factorIx, int sigmaIx, double newSigma) const; + + /** + * Compute step size alpha for the new solution x' = xk + alpha*p, where alpha \in [0,1] + * We have to make sure the new solution with alpha satisfies all INACTIVE ineq constraints + * If some inactive ineq constraints complain about the full step (alpha = 1), + * we have to adjust alpha to stay within the ineq constraints' feasible regions. + * + * For each inactive ineq j: + * - We already have: aj'*xk - bj <= 0, since xk satisfies all ineq constraints + * - We want: aj'*(xk + alpha*p) - bj <= 0 + * - If aj'*p <= 0, we have: aj'*(xk + alpha*p) <= aj'*xk <= bj, for all alpha>0 + * it's good! + * - We only care when aj'*p > 0. In this case, we need to choose alpha so that + * aj'*xk + alpha*aj'*p - bj <= 0 --> alpha <= (bj - aj'*xk) / (aj'*p) + * We want to step as far as possible, so we should choose alpha = (bj - aj'*xk) / (aj'*p) + * + * We want the minimum of all those alphas among all inactive ineq. + * + * @return a tuple of (alpha, factorIndex, sigmaIndex) where (factorIndex, sigmaIndex) + * is the constraint that has minimum alpha, or (-1,-1) if alpha = 1. + * This constraint will be added to the working set and become active + * in the next iteration + */ + boost::tuple computeStepSize(const GaussianFactorGraph& workingGraph, + const VectorValues& xk, const VectorValues& p) const; + + /** Iterate 1 step, modify workingGraph and currentSolution *IN PLACE* !!! */ + bool iterateInPlace(GaussianFactorGraph& workingGraph, VectorValues& currentSolution) const; + + /** Optimize */ + VectorValues optimize(const VectorValues& initials) const; + +private: + /// Convert a Gaussian factor to a jacobian. return empty shared ptr if failed + JacobianFactor::shared_ptr toJacobian(const GaussianFactor::shared_ptr& factor) const { + JacobianFactor::shared_ptr jacobian( + boost::dynamic_pointer_cast(factor)); + return jacobian; + } + + /// Convert a Gaussian factor to a Hessian. Return empty shared ptr if failed + HessianFactor::shared_ptr toHessian(const GaussianFactor::shared_ptr factor) const { + HessianFactor::shared_ptr hessian(boost::dynamic_pointer_cast(factor)); + return hessian; + } + + /// Collect all free Hessians involving constrained variables into a graph + GaussianFactorGraph::shared_ptr unconstrainedHessiansOfConstrainedVars( + const GaussianFactorGraph& graph, const KeySet& constrainedVars) const; + +}; + + +} /* namespace gtsam */ diff --git a/gtsam/linear/tests/testQPSolver.cpp b/gtsam/linear/tests/testQPSolver.cpp index ee84cddaf..5b8eb0433 100644 --- a/gtsam/linear/tests/testQPSolver.cpp +++ b/gtsam/linear/tests/testQPSolver.cpp @@ -17,11 +17,9 @@ */ #include -#include -#include -#include #include #include +#include using namespace std; using namespace gtsam; @@ -30,411 +28,6 @@ using namespace gtsam::symbol_shorthand; #define TEST_DISABLED(testGroup, testName)\ void testGroup##testName##Test(TestResult& result_, const std::string& name_) -class QPSolver { - const GaussianFactorGraph& graph_; //!< the original graph, can't be modified! - FastVector constraintIndices_; //!< Indices of constrained factors in the original graph - GaussianFactorGraph::shared_ptr freeHessians_; //!< unconstrained Hessians of constrained variables - VariableIndex freeHessianFactorIndex_; //!< indices of unconstrained Hessian factors of constrained variables - // gtsam calls it "VariableIndex", but I think FactorIndex - // makes more sense, because it really stores factor indices. - VariableIndex fullFactorIndices_; //!< indices of factors involving each variable. - // gtsam calls it "VariableIndex", but I think FactorIndex - // makes more sense, because it really stores factor indices. - -public: - /// Constructor - QPSolver(const GaussianFactorGraph& graph) : - graph_(graph), fullFactorIndices_(graph) { - - // Split the original graph into unconstrained and constrained part - // and collect indices of constrained factors - for (size_t i = 0; i < graph.nrFactors(); ++i) { - // obtain the factor and its noise model - JacobianFactor::shared_ptr jacobian = toJacobian(graph.at(i)); - if (jacobian && jacobian->get_model() - && jacobian->get_model()->isConstrained()) { - constraintIndices_.push_back(i); - } - } - - // Collect constrained variable keys - KeySet constrainedVars; - BOOST_FOREACH(size_t index, constraintIndices_) { - KeyVector keys = graph[index]->keys(); - constrainedVars.insert(keys.begin(), keys.end()); - } - - // Collect unconstrained hessians of constrained vars to build dual graph - freeHessians_ = unconstrainedHessiansOfConstrainedVars(graph, constrainedVars); - freeHessianFactorIndex_ = VariableIndex(*freeHessians_); - } - - /// Return indices of all constrained factors - FastVector constraintIndices() const { return constraintIndices_; } - - /// Convert a Gaussian factor to a jacobian. return empty shared ptr if failed - JacobianFactor::shared_ptr toJacobian(const GaussianFactor::shared_ptr& factor) const { - JacobianFactor::shared_ptr jacobian( - boost::dynamic_pointer_cast(factor)); - return jacobian; - } - - /// Convert a Gaussian factor to a Hessian. Return empty shared ptr if failed - HessianFactor::shared_ptr toHessian(const GaussianFactor::shared_ptr factor) const { - HessianFactor::shared_ptr hessian(boost::dynamic_pointer_cast(factor)); - return hessian; - } - - /// Return the Hessian factor graph of constrained variables - GaussianFactorGraph::shared_ptr freeHessiansOfConstrainedVars() const { - return freeHessians_; - } - - /* ************************************************************************* */ - GaussianFactorGraph::shared_ptr unconstrainedHessiansOfConstrainedVars( - const GaussianFactorGraph& graph, const KeySet& constrainedVars) const { - VariableIndex variableIndex(graph); - GaussianFactorGraph::shared_ptr hfg = boost::make_shared(); - - // Collect all factors involving constrained vars - FastSet factors; - BOOST_FOREACH(Key key, constrainedVars) { - VariableIndex::Factors factorsOfThisVar = variableIndex[key]; - BOOST_FOREACH(size_t factorIndex, factorsOfThisVar) { - factors.insert(factorIndex); - } - } - - // Convert each factor into Hessian - BOOST_FOREACH(size_t factorIndex, factors) { - if (!graph[factorIndex]) continue; - // See if this is a Jacobian factor - JacobianFactor::shared_ptr jf = toJacobian(graph[factorIndex]); - if (jf) { - // Dealing with mixed constrained factor - if (jf->get_model() && jf->isConstrained()) { - // Turn a mixed-constrained factor into a factor with 0 information on the constrained part - Vector sigmas = jf->get_model()->sigmas(); - Vector newPrecisions(sigmas.size()); - bool mixed = false; - for (size_t s=0; sclone()); - newJacobian->setModel(noiseModel::Diagonal::Precisions(newPrecisions)); - hfg->push_back(HessianFactor(*newJacobian)); - } - } - else { // unconstrained Jacobian - // Convert the original linear factor to Hessian factor - hfg->push_back(HessianFactor(*graph[factorIndex])); - } - } - else { // If it's not a Jacobian, it should be a hessian factor. Just add! - hfg->push_back(graph[factorIndex]); - } - - } - return hfg; - } - - /* ************************************************************************* */ - /** - * Build the dual graph to solve for the Lagrange multipliers. - * - * The Lagrangian function is: - * L(X,lambdas) = f(X) - \sum_k lambda_k * c_k(X), - * where the unconstrained part is - * f(X) = 0.5*X'*G*X - X'*g + 0.5*f0 - * and the linear equality constraints are - * c1(X), c2(X), ..., cm(X) - * - * Take the derivative of L wrt X at the solution and set it to 0, we have - * \grad f(X) = \sum_k lambda_k * \grad c_k(X) (*) - * - * For each set of rows of (*) corresponding to a variable xi involving in some constraints - * we have: - * \grad f(xi) = \frac{\partial f}{\partial xi}' = \sum_j G_ij*xj - gi - * \grad c_k(xi) = \frac{\partial c_k}{\partial xi}' - * - * Note: If xi does not involve in any constraint, we have the trivial condition - * \grad f(Xi) = 0, which should be satisfied as a usual condition for unconstrained variables. - * - * So each variable xi involving in some constraints becomes a linear factor A*lambdas - b = 0 - * on the constraints' lambda multipliers, as follows: - * - The jacobian term A_k for each lambda_k is \grad c_k(xi) - * - The constant term b is \grad f(xi), which can be computed from all unconstrained - * Hessian factors connecting to xi: \grad f(xi) = \sum_j G_ij*xj - gi - */ - GaussianFactorGraph buildDualGraph(const GaussianFactorGraph& graph, - const VectorValues& x0) const { - // The dual graph to return - GaussianFactorGraph dualGraph; - - // For each variable xi involving in some constraint, compute the unconstrained gradient - // wrt xi from the prebuilt freeHessian graph - // \grad f(xi) = \frac{\partial f}{\partial xi}' = \sum_j G_ij*xj - gi - BOOST_FOREACH(const VariableIndex::value_type& xiKey_factors, freeHessianFactorIndex_) { - Key xiKey = xiKey_factors.first; - VariableIndex::Factors xiFactors = xiKey_factors.second; - - // Find xi's dim from the first factor on xi - if (xiFactors.size() == 0) continue; - GaussianFactor::shared_ptr xiFactor0 = freeHessians_->at(0); - size_t xiDim = xiFactor0->getDim(xiFactor0->find(xiKey)); - - // Compute gradf(xi) = \frac{\partial f}{\partial xi}' = \sum_j G_ij*xj - gi - Vector gradf_xi = zero(xiDim); - BOOST_FOREACH(size_t factorIx, xiFactors) { - HessianFactor::shared_ptr factor = toHessian(freeHessians_->at(factorIx)); - Factor::const_iterator xi = factor->find(xiKey); - // Sum over Gij*xj for all xj connecting to xi - for (Factor::const_iterator xj = factor->begin(); xj != factor->end(); - ++xj) { - // Obtain Gij from the Hessian factor - // Hessian factor only stores an upper triangular matrix, so be careful when i>j - Matrix Gij; - if (xi > xj) { - Matrix Gji = factor->info(xj, xi); - Gij = Gji.transpose(); - } - else { - Gij = factor->info(xi, xj); - } - // Accumulate Gij*xj to gradf - Vector x0_j = x0.at(*xj); - gradf_xi += Gij * x0_j; - } - // Subtract the linear term gi - gradf_xi += -factor->linearTerm(xi); - } - - // Obtain the jacobians for lambda variables from their corresponding constraints - // gradc_k(xi) = \frac{\partial c_k}{\partial xi}' - std::vector > lambdaTerms; // collection of lambda_k, and gradc_k - BOOST_FOREACH(size_t factorIndex, fullFactorIndices_[xiKey]) { - JacobianFactor::shared_ptr factor = toJacobian(graph.at(factorIndex)); - if (!factor || !factor->isConstrained()) continue; - // Gradient is the transpose of the Jacobian: A_k = gradc_k(xi) = \frac{\partial c_k}{\partial xi}' - // Each column for each lambda_k corresponds to [the transpose of] each constrained row factor - Matrix A_k = factor->getA(factor->find(xiKey)).transpose(); - // Deal with mixed sigmas: no information if sigma != 0 - Vector sigmas = factor->get_model()->sigmas(); - for (size_t sigmaIx = 0; sigmaIx0) - // we have no information about it - if (fabs(sigmas[sigmaIx]) > 1e-9) { - A_k.col(sigmaIx) = zero(A_k.rows()); - } - } - // Use factorIndex as the lambda's key. - lambdaTerms.push_back(make_pair(factorIndex, A_k)); - } - // Enforce constrained noise model so lambdas are solved with QR - // and should exactly satisfy all the equations - dualGraph.push_back(JacobianFactor(lambdaTerms, gradf_xi, - noiseModel::Constrained::All(gradf_xi.size()))); - - // Add 0 priors on all lambdas to make sure the graph is solvable - // TODO: Can we do for all lambdas like this, or only for those with no information? - BOOST_FOREACH(size_t factorIndex, fullFactorIndices_[xiKey]) { - JacobianFactor::shared_ptr factor = toJacobian(graph.at(factorIndex)); - if (!factor || !factor->isConstrained()) continue; - size_t dim= factor->get_model()->dim(); - // Use factorIndex as the lambda's key. - dualGraph.push_back(JacobianFactor(factorIndex, eye(dim), zero(dim))); - } - } - return dualGraph; - } - - /** - * Find max lambda element. - * For active ineq constraints (those that are enforced as eq constraints now - * in the working set), we want lambda < 0. - * This is because: - * - From the Lagrangian L = f - lambda*c, we know that the constraint force is - * (lambda * \grad c) = \grad f, because it cancels out the unconstrained - * unconstrained force (-\grad f), which is pulling x in the opposite direction - * of \grad f towards the unconstrained minimum point - * - We also know that at the constraint surface \grad c points toward + (>= 0), - * while we are solving for - (<=0) constraint - * - So, we want the constraint force (lambda * \grad c) to to pull x - * towards the opposite direction of \grad c, i.e. towards the area - * where the ineq constraint <=0 is satisfied. - * - Hence, we want lambda < 0 - */ - std::pair findWeakestViolationIneq(const VectorValues& lambdas) const { - int worstFactorIx = -1, worstSigmaIx = -1; - // preset the maxLambda to 0.0: if lambda is <= 0.0, the constraint is either - // inactive or a good ineq constraint, so we don't care! - double maxLambda = 0.0; - BOOST_FOREACH(size_t factorIx, constraintIndices_) { - Vector lambda = lambdas.at(factorIx); - Vector orgSigmas = toJacobian(graph_.at(factorIx))->get_model()->sigmas(); - for (size_t j = 0; j maxLambda) { - worstFactorIx = factorIx; - worstSigmaIx = j; - maxLambda = lambda[j]; - } - } - return make_pair(worstFactorIx, worstSigmaIx); - } - - /** - * Deactivate or activate an ineq constraint in place - * Warning: modify in-place to avoid copy/clone - * @return true if update successful - */ - bool updateWorkingSetInplace(GaussianFactorGraph& workingGraph, - int factorIx, int sigmaIx, double newSigma) const { - if (factorIx < 0 || sigmaIx < 0) - return false; - Vector sigmas = toJacobian(workingGraph.at(factorIx))->get_model()->sigmas(); - sigmas[sigmaIx] = newSigma; // removing it from the working set - toJacobian(workingGraph.at(factorIx))->setModel(true, sigmas); - return true; - } - - /** - * Compute step size alpha for the new solution x' = xk + alpha*p, where alpha \in [0,1] - * We have to make sure the new solution with alpha satisfies all INACTIVE ineq constraints - * If some inactive ineq constraints complain about the full step (alpha = 1), - * we have to adjust alpha to stay within the ineq constraints' feasible regions. - * - * For each inactive ineq j: - * - We already have: aj'*xk - bj <= 0, since xk satisfies all ineq constraints - * - We want: aj'*(xk + alpha*p) - bj <= 0 - * - If aj'*p <= 0, we have: aj'*(xk + alpha*p) <= aj'*xk <= bj, for all alpha>0 - * it's good! - * - We only care when aj'*p > 0. In this case, we need to choose alpha so that - * aj'*xk + alpha*aj'*p - bj <= 0 --> alpha <= (bj - aj'*xk) / (aj'*p) - * We want to step as far as possible, so we should choose alpha = (bj - aj'*xk) / (aj'*p) - * - * We want the minimum of all those alphas among all inactive ineq. - * - * @return a tuple of (alpha, factorIndex, sigmaIndex) where (factorIndex, sigmaIndex) - * is the constraint that has minimum alpha, or (-1,-1) if alpha = 1. - * This constraint will be added to the working set and become active - * in the next iteration - */ - boost::tuple computeStepSize(const GaussianFactorGraph& workingGraph, - const VectorValues& xk, const VectorValues& p) const { - static bool debug = true; - - double minAlpha = 1.0; - int closestFactorIx = -1, closestSigmaIx = -1; - BOOST_FOREACH(size_t factorIx, constraintIndices_) { - JacobianFactor::shared_ptr jacobian = toJacobian(workingGraph.at(factorIx)); - Vector sigmas = jacobian->get_model()->sigmas(); - Vector b = jacobian->getb(); - for (size_t s = 0; sbegin(); xj != jacobian->end(); ++xj) { - Vector pj = p.at(*xj); - Vector aj = jacobian->getA(xj).row(s); - ajTp += aj.dot(pj); - } - if (debug) { - cout << "s, ajTp: " << s << " " << ajTp << endl; - } - - // Check if aj'*p >0. Don't care if it's not. - if (ajTp<=0) continue; - - // Compute aj'*xk - double ajTx = 0.0; - for (Factor::const_iterator xj = jacobian->begin(); xj != jacobian->end(); ++xj) { - Vector xkj = xk.at(*xj); - Vector aj = jacobian->getA(xj).row(s); - ajTx += aj.dot(xkj); - } - if (debug) { - cout << "b[s], ajTx: " << b[s] << " " << ajTx << " " << ajTp << endl; - } - - // alpha = (bj - aj'*xk) / (aj'*p) - double alpha = (b[s] - ajTx)/ajTp; - if (debug) { - cout << "alpha: " << alpha << endl; - } - - // We want the minimum of all those max alphas - if (alpha < minAlpha) { - closestFactorIx = factorIx; - closestSigmaIx = s; - minAlpha = alpha; - } - } - } - } - return boost::make_tuple(minAlpha, closestFactorIx, closestSigmaIx); - } - - /** Iterate 1 step, modify workingGraph and currentSolution in place */ - bool iterateInPlace(GaussianFactorGraph& workingGraph, VectorValues& currentSolution) const { - static bool debug = true; - // Obtain the solution from the current working graph - VectorValues newSolution = workingGraph.optimize(); - if (debug) newSolution.print("New solution:"); - - // If we CAN'T move further - if (newSolution.equals(currentSolution, 1e-5)) { - // Compute lambda from the dual graph - GaussianFactorGraph dualGraph = buildDualGraph(workingGraph, newSolution); - if (debug) dualGraph.print("Dual graph: "); - VectorValues lambdas = dualGraph.optimize(); - if (debug) lambdas.print("lambdas :"); - - int factorIx, sigmaIx; - boost::tie(factorIx, sigmaIx) = findWeakestViolationIneq(lambdas); - - // Try to disactivate the weakest violated ineq constraints - // if not successful, i.e. all ineq constraints are satisfied: We have the solution!! - if (!updateWorkingSetInplace(workingGraph, factorIx, sigmaIx, -1.0)) - return true; - } - else { - // If we CAN make some progress - // Adapt stepsize if some inactive inequality constraints complain about this move - double alpha; - int factorIx, sigmaIx; - VectorValues p = newSolution - currentSolution; - boost::tie(alpha, factorIx, sigmaIx) = computeStepSize(workingGraph, currentSolution, p); - if (debug) { - cout << "alpha, factorIx, sigmaIx: " << alpha << " " << factorIx << " " << sigmaIx << endl; - } - // also add to the working set the one that complains the most - updateWorkingSetInplace(workingGraph, factorIx, sigmaIx, 0.0); - // step! - currentSolution = currentSolution + alpha * p; - } - - return false; - } - - VectorValues optimize(const VectorValues& initials) const { - GaussianFactorGraph workingGraph = graph_.clone(); - VectorValues currentSolution = initials; - bool converged = false; - while (!converged) { - converged = iterateInPlace(workingGraph, currentSolution); - } - return currentSolution; - } - -}; - /* ************************************************************************* */ // Create test graph according to Forst10book_pg171Ex5 GaussianFactorGraph createTestCase() { @@ -462,7 +55,7 @@ GaussianFactorGraph createTestCase() { return graph; } -TEST_DISABLED(QPSolver, constraintsAux) { +TEST(QPSolver, constraintsAux) { GaussianFactorGraph graph = createTestCase(); QPSolver solver(graph); FastVector constraintIx = solver.constraintIndices(); @@ -472,14 +65,14 @@ TEST_DISABLED(QPSolver, constraintsAux) { VectorValues lambdas; lambdas.insert(constraintIx[0], (Vector(4)<< -0.5, 0.0, 0.3, 0.1)); int factorIx, lambdaIx; - boost::tie(factorIx, lambdaIx) = solver.findWeakestViolationIneq(lambdas); + boost::tie(factorIx, lambdaIx) = solver.findWorstViolatedActiveIneq(lambdas); LONGS_EQUAL(1, factorIx); LONGS_EQUAL(2, lambdaIx); VectorValues lambdas2; lambdas2.insert(constraintIx[0], (Vector(4)<< -0.5, 0.0, -0.3, -0.1)); int factorIx2, lambdaIx2; - boost::tie(factorIx2, lambdaIx2) = solver.findWeakestViolationIneq(lambdas2); + boost::tie(factorIx2, lambdaIx2) = solver.findWorstViolatedActiveIneq(lambdas2); LONGS_EQUAL(-1, factorIx2); LONGS_EQUAL(-1, lambdaIx2);