Using TBB task continuation in tree traversal but this unfortunately does not improve performance

release/4.3a0
Richard Roberts 2013-09-27 16:30:36 +00:00
parent 7560a1f6e3
commit 6f463166a9
1 changed files with 65 additions and 40 deletions

View File

@ -62,71 +62,95 @@ namespace gtsam {
#ifdef GTSAM_USE_TBB #ifdef GTSAM_USE_TBB
// Internal node used in parallel traversal stack /* ************************************************************************* */
template<typename NODE, typename DATA> template<typename NODE, typename DATA, typename VISITOR_POST>
struct ParallelTraversalNode { class PostOrderTask : public tbb::task
{
public:
const boost::shared_ptr<NODE>& treeNode; const boost::shared_ptr<NODE>& treeNode;
DATA myData; boost::shared_ptr<DATA> myData;
ParallelTraversalNode(const boost::shared_ptr<NODE>& treeNode, const DATA& myData) : VISITOR_POST& visitorPost;
treeNode(treeNode), myData(myData) {}
PostOrderTask(const boost::shared_ptr<NODE>& treeNode, const boost::shared_ptr<DATA>& myData, VISITOR_POST& visitorPost) :
treeNode(treeNode), myData(myData), visitorPost(visitorPost) {}
tbb::task* execute()
{
// Run the post-order visitor
(void) visitorPost(treeNode, *myData);
return NULL;
}
}; };
/* ************************************************************************* */
template<typename NODE, typename DATA, typename VISITOR_PRE, typename VISITOR_POST> template<typename NODE, typename DATA, typename VISITOR_PRE, typename VISITOR_POST>
class PreOrderTask : public tbb::task class PreOrderTask : public tbb::task
{ {
public: public:
const boost::shared_ptr<NODE>& treeNode; const boost::shared_ptr<NODE>& treeNode;
DATA myData; boost::shared_ptr<DATA> myData;
VISITOR_PRE& visitorPre; VISITOR_PRE& visitorPre;
VISITOR_POST& visitorPost; VISITOR_POST& visitorPost;
int problemSizeThreshold; int problemSizeThreshold;
bool makeNewTasks; bool makeNewTasks;
PreOrderTask(const boost::shared_ptr<NODE>& treeNode, const DATA& myData,
PreOrderTask(const boost::shared_ptr<NODE>& treeNode, const boost::shared_ptr<DATA>& myData,
VISITOR_PRE& visitorPre, VISITOR_POST& visitorPost, int problemSizeThreshold, VISITOR_PRE& visitorPre, VISITOR_POST& visitorPost, int problemSizeThreshold,
bool makeNewTasks = true) : bool makeNewTasks = true) :
treeNode(treeNode), myData(myData), visitorPre(visitorPre), visitorPost(visitorPost), treeNode(treeNode), myData(myData), visitorPre(visitorPre), visitorPost(visitorPost),
problemSizeThreshold(problemSizeThreshold), makeNewTasks(makeNewTasks) {} problemSizeThreshold(problemSizeThreshold), makeNewTasks(makeNewTasks) {}
typedef ParallelTraversalNode<NODE, DATA> ParallelTraversalNodeType;
tbb::task* execute() tbb::task* execute()
{
// Process this node and its children
processNode(treeNode, myData);
// Return NULL
return NULL;
}
void processNode(const boost::shared_ptr<NODE>& node, DATA& myData)
{ {
if (makeNewTasks) if (makeNewTasks)
{ {
bool overThreshold = (node->problemSize() >= problemSizeThreshold); if(!treeNode->children.empty())
{
// Allocate post-order task as a continuation
PostOrderTask<NODE, DATA, VISITOR_POST>& postOrderTask =
*new(allocate_continuation()) PostOrderTask<NODE, DATA, VISITOR_POST>(treeNode, myData, visitorPost);
bool overThreshold = (treeNode->problemSize() >= problemSizeThreshold);
tbb::task_list childTasks; tbb::task_list childTasks;
BOOST_FOREACH(const boost::shared_ptr<NODE>& child, node->children) BOOST_FOREACH(const boost::shared_ptr<NODE>& child, treeNode->children)
{ {
// Process child in a subtask. Important: Run visitorPre before calling // Process child in a subtask. Important: Run visitorPre before calling
// allocate_child so that if visitorPre throws an exception, we will not have // allocate_child so that if visitorPre throws an exception, we will not have
// allocated an extra child, this causes a TBB error. // allocated an extra child, this causes a TBB error.
const DATA childData = visitorPre(child, myData); boost::shared_ptr<DATA> childData = boost::allocate_shared<DATA>(tbb::scalable_allocator<DATA>(), visitorPre(child, *myData));
childTasks.push_back(*new(allocate_child()) childTasks.push_back(*new(postOrderTask.allocate_child())
PreOrderTask(child, childData, visitorPre, visitorPost, PreOrderTask(child, childData, visitorPre, visitorPost,
problemSizeThreshold, overThreshold)); problemSizeThreshold, overThreshold));
} }
// If we have child tasks, start subtasks and wait for them to complete // If we have child tasks, start subtasks and wait for them to complete
set_ref_count(1 + (int)node->children.size()); postOrderTask.set_ref_count((int) treeNode->children.size());
spawn_and_wait_for_all(childTasks); spawn(childTasks);
} }
else else
{
// Run the post-order visitor in this task if we have no children
(void) visitorPost(treeNode, *myData);
}
}
else
{
// Process this node and its children in this task
processNodeRecursively(treeNode, *myData);
}
// Return NULL
return NULL;
}
void processNodeRecursively(const boost::shared_ptr<NODE>& node, DATA& myData)
{ {
BOOST_FOREACH(const boost::shared_ptr<NODE>& child, node->children) BOOST_FOREACH(const boost::shared_ptr<NODE>& child, node->children)
{ {
DATA childData = visitorPre(child, myData); DATA childData = visitorPre(child, myData);
processNode(child, childData); processNodeRecursively(child, childData);
}
} }
// Run the post-order visitor // Run the post-order visitor
@ -134,6 +158,7 @@ namespace gtsam {
} }
}; };
/* ************************************************************************* */
template<typename ROOTS, typename NODE, typename DATA, typename VISITOR_PRE, typename VISITOR_POST> template<typename ROOTS, typename NODE, typename DATA, typename VISITOR_PRE, typename VISITOR_POST>
class RootTask : public tbb::task class RootTask : public tbb::task
{ {
@ -155,7 +180,7 @@ namespace gtsam {
tbb::task_list tasks; tbb::task_list tasks;
BOOST_FOREACH(const boost::shared_ptr<NODE>& root, roots) BOOST_FOREACH(const boost::shared_ptr<NODE>& root, roots)
{ {
DATA rootData = visitorPre(root, myData); boost::shared_ptr<DATA> rootData = boost::allocate_shared<DATA>(tbb::scalable_allocator<DATA>(), visitorPre(root, myData));
tasks.push_back(*new(allocate_child()) tasks.push_back(*new(allocate_child())
PreOrderTask(root, rootData, visitorPre, visitorPost, problemSizeThreshold)); PreOrderTask(root, rootData, visitorPre, visitorPost, problemSizeThreshold));
} }