OpenCV_4.2.0/opencv_contrib-4.2.0/modules/cnn_3dobj/samples/video.cpp

391 lines
21 KiB
C++
Raw Normal View History

2024-07-25 16:47:56 +08:00
#include <opencv2/viz/vizcore.hpp>
#include <opencv2/calib3d.hpp>
#include <iostream>
#include <fstream>
#include <opencv2/cnn_3dobj.hpp>
#include <opencv2/features2d.hpp>
#include <iomanip>
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
/**
* @function listDir
* @brief Making all files names under a directory into a list
*/
static void listDir(const char *path, std::vector<String>& files, bool r)
{
DIR *pDir;
struct dirent *ent;
char childpath[512];
pDir = opendir(path);
memset(childpath, 0, sizeof(childpath));
while ((ent = readdir(pDir)) != NULL)
{
if (ent->d_type & DT_DIR)
{
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
{
continue;
}
if(r)
{
sprintf(childpath, "%s/%s", path, ent->d_name);
listDir(childpath,files,false);
}
}
else
{
files.push_back(ent->d_name);
}
}
sort(files.begin(),files.end());
};
/**
* @function cvcloud_load
* @brief load bunny.ply
*/
static Mat cvcloud_load(Mat feature_reference)
{
Mat cloud(1, feature_reference.rows, CV_32FC3);
Point3f* data = cloud.ptr<cv::Point3f>();
for(int i = 0; i < feature_reference.rows; ++i)
{
data[i].x = feature_reference.at<float>(i,0);
data[i].y = feature_reference.at<float>(i,1);
data[i].z = feature_reference.at<float>(i,2);
}
cloud *= 5.0f;
return cloud;
}
/**
* @function main
*/
int main(int argc, char **argv)
{
const String keys = "{help | | This sample will extract featrues from reference images and target image for classification. You can add a mean_file if there little variance in data such as human faces, otherwise it is not so useful}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}"
"{caffemodellist | ../../testdata/cv/caffemodel_list.txt | caffe model for feature exrtaction.}"
"{network_forIMG | ../../testdata/cv/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
"{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}"
"{target_img1 | ../data/images_all/0_48.png | Path of image waiting to be classified.}"
"{target_img2 | ../data/images_all/1_339.png | Path of image waiting to be classified.}"
"{target_img3 | ../data/images_all/2_296.png | Path of image waiting to be classified.}"
"{target_img4 | ../data/images_all/3_466.png | Path of image waiting to be classified.}"
"{target_img5 | ../data/images_all/4_117.png | Path of image waiting to be classified.}"
"{target_img6 | ../data/images_all/5_236.png | Path of image waiting to be classified.}"
"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
"{num_candidate | 4 | Number of candidates in gallery as the prediction result.}"
"{device | CPU | Device type: CPU or GPU}"
"{dev_id | 0 | Device id}";
/* get parameters from comand line */
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Feature extraction and classification");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
String src_dir = parser.get<String>("src_dir");
String caffemodellist = parser.get<String>("caffemodellist");
String network_forIMG = parser.get<String>("network_forIMG");
String mean_file = parser.get<String>("mean_file");
String target_img1 = parser.get<String>("target_img1");
String target_img2 = parser.get<String>("target_img2");
String target_img3 = parser.get<String>("target_img3");
String target_img4 = parser.get<String>("target_img4");
String target_img5 = parser.get<String>("target_img5");
String target_img6 = parser.get<String>("target_img6");
String feature_blob = parser.get<String>("feature_blob");
int num_candidate = parser.get<int>("num_candidate");
String device = parser.get<String>("device");
ifstream namelist_model(caffemodellist.c_str(), ios::in);
vector<String> caffemodel;
char *buf = new char[512];
int number_model = 0;
while (!namelist_model.eof())
{
namelist_model.getline(buf, 512);
caffemodel.push_back(buf);
number_model++;
}
/* List the file names under a given path */
std::vector<String> name_gallery;
listDir(src_dir.c_str(), name_gallery, false);
for (unsigned int i = 0; i < name_gallery.size(); i++)
{
name_gallery[i] = src_dir + name_gallery[i];
}
std::vector<cv::Mat> img_gallery;
cv::Mat temp_feat;
vector<cv::Mat> feature_reference;
vector<cv::Mat> feature_test1;
vector<cv::Mat> feature_test2;
vector<cv::Mat> feature_test3;
vector<cv::Mat> feature_test4;
vector<cv::Mat> feature_test5;
vector<cv::Mat> feature_test6;
cv::Mat img_test1 = cv::imread(target_img1, -1);
cv::Mat img_test2 = cv::imread(target_img2, -1);
cv::Mat img_test3 = cv::imread(target_img3, -1);
cv::Mat img_test4 = cv::imread(target_img4, -1);
cv::Mat img_test5 = cv::imread(target_img5, -1);
cv::Mat img_test6 = cv::imread(target_img6, -1);
for (int num_model = 0; num_model < number_model; ++num_model)
{
feature_reference.push_back(temp_feat);
feature_test1.push_back(temp_feat);
feature_test2.push_back(temp_feat);
feature_test3.push_back(temp_feat);
feature_test4.push_back(temp_feat);
feature_test5.push_back(temp_feat);
feature_test6.push_back(temp_feat);
}
for (unsigned int i = 0; i < name_gallery.size(); i++)
{
img_gallery.push_back(cv::imread(name_gallery[i], -1));
}
/* Initialize a net work with Device */
cv::cnn_3dobj::descriptorExtractor descriptor(device);
std::cout << "Using" << descriptor.getDeviceType() << std::endl;
/* Load net with the caffe trained net work parameter and structure */
for (int num_model = 0; num_model < number_model; ++num_model)
{
if (strcmp(mean_file.c_str(), "no") == 0)
descriptor.loadNet(network_forIMG, caffemodel[num_model]);
else
descriptor.loadNet(network_forIMG, caffemodel[num_model], mean_file);
/* Part1: Extract feature from a set of images and a single image*/
descriptor.extract(img_gallery, feature_reference[num_model], feature_blob);
descriptor.extract(img_test1, feature_test1[num_model], feature_blob);
descriptor.extract(img_test2, feature_test2[num_model], feature_blob);
descriptor.extract(img_test3, feature_test3[num_model], feature_blob);
descriptor.extract(img_test4, feature_test4[num_model], feature_blob);
descriptor.extract(img_test5, feature_test5[num_model], feature_blob);
descriptor.extract(img_test6, feature_test6[num_model], feature_blob);
}
/* Initialize a matcher which using L2 distance. */
cv::BFMatcher matcher(NORM_L2);
vector<vector<vector<cv::DMatch> > > matches1;
vector<vector<vector<cv::DMatch> > > matches2;
vector<vector<vector<cv::DMatch> > > matches3;
vector<vector<vector<cv::DMatch> > > matches4;
vector<vector<vector<cv::DMatch> > > matches5;
vector<vector<vector<cv::DMatch> > > matches6;
vector<vector<cv::DMatch> > matches_temp;
for (int num_model = 0; num_model < number_model; ++num_model)
{
matches1.push_back(matches_temp);
matches2.push_back(matches_temp);
matches3.push_back(matches_temp);
matches4.push_back(matches_temp);
matches5.push_back(matches_temp);
matches6.push_back(matches_temp);
}
/* Have a KNN match on the target and reference images. */
for (int num_model = 0; num_model < number_model; ++num_model)
{
matcher.knnMatch(feature_test1[num_model], feature_reference[num_model], matches1[num_model], num_candidate+1);
matcher.knnMatch(feature_test2[num_model], feature_reference[num_model], matches2[num_model], num_candidate+1);
matcher.knnMatch(feature_test3[num_model], feature_reference[num_model], matches3[num_model], num_candidate+1);
matcher.knnMatch(feature_test4[num_model], feature_reference[num_model], matches4[num_model], num_candidate+1);
matcher.knnMatch(feature_test5[num_model], feature_reference[num_model], matches5[num_model], num_candidate+1);
matcher.knnMatch(feature_test6[num_model], feature_reference[num_model], matches6[num_model], num_candidate+1);
}
vector<Mat> img_merge;
/* Part2: Start to have a show */
viz::Viz3d myWindow0("Instruction");
viz::Viz3d myWindow1("Point Cloud");
viz::Viz3d myWindow2("Prediction sample");
/* Set window size as 1024*1024, we use this scale as default. */
myWindow0.setWindowSize(Size(1300,100));
myWindow0.setWindowPosition(Point(0,800));
myWindow1.setWindowSize(Size(700,600));
myWindow1.setWindowPosition(Point(600,0));
myWindow2.setWindowSize(Size(600,600));
myWindow2.setWindowPosition(Point(-20,0));
/* Pose of the widget in camera frame */
Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f));
Point3d campos(1,0,0);
/* Get the transformation matrix from camera coordinate system to global. */
Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos);
/* Pose of the widget in global frame */
Affine3f cloud_pose_global = transform * cloud_pose;
/* Set background color. */
myWindow0.setBackgroundColor(viz::Color::white());
myWindow1.setBackgroundColor(viz::Color::white());
myWindow2.setBackgroundColor(viz::Color::white());
Point3d cam_y_dir(0.0f,0.0f,1.0f);
cv::cnn_3dobj::icoSphere ViewSphere(1,0);
Mat bunny_cloud;
Point3d cam_focal_point;
float radius;
float translation_phase = 0.0;
int count_pre, num_rotate, max_rotate;
String titlename, Hint, Pred("prediction: ");
vector<viz::WImageOverlay> imagepredict;
String widgename[24] = {"1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24"};
vector<Mat> slide;
slide.push_back(imread("1.png"));
slide.push_back(imread("2.png"));
slide.push_back(imread("3.png"));
slide.push_back(imread("4.png"));
slide.push_back(imread("5.png"));
slide.push_back(imread("6.png"));
slide.push_back(imread("7.png"));
slide.push_back(imread("8.png"));
slide.push_back(imread("9.png"));
slide.push_back(imread("10.png"));
/// Create a window
viz::Viz3d myWindowS("Slide Show");
myWindowS.setWindowSize(Size(1300,700));
myWindowS.setWindowPosition(Point(0,0));
myWindowS.setBackgroundColor(viz::Color::white());
for (size_t i = 0; i < slide.size(); ++i)
{
/// Create a triangle widget
viz::WImageOverlay slide1(slide[i],Rect(0, 0, 1300, 700));
/// Show widget in the visualizer window
num_rotate = 0;
if (i == 0)
max_rotate = 2000;
else
max_rotate = 230;
while (num_rotate != max_rotate)
{
myWindowS.showWidget("Slide1", slide1);
/// Start event loop
myWindowS.spinOnce(1, true);
num_rotate++;
}
}
for (int num_model = 0; num_model < number_model; ++num_model)
{
if (num_model == 0)
Hint = "Start training.";
else if (num_model == 28)
Hint = "Different Classes Are Clustered.";
else if(num_model == 40)
Hint = "Poses Are Set apart.";
else if(num_model == 42)
Hint = "Finished. Model could: tell both classes and poses.";
titlename = caffemodel[num_model];
titlename = "Prediction Result of Model Trained on Iteration " + titlename.substr(34, titlename.length() - 44);
viz::WText title(titlename, Point(100, 50), 30, viz::Color::black());
viz::WText hint(Hint, Point(400, 20), 25, viz::Color::black());
viz::WImageOverlay image3d1(img_test1, Rect(20, 40, img_test4.rows, img_test4.cols));
viz::WText arrow1(Pred, Point(90,60), 15, viz::Color::red());
viz::WImageOverlay image3d2(img_test2, Rect(20, 40+75, img_test4.rows, img_test4.cols));
viz::WText arrow2(Pred, Point(90,60+75), 15, viz::Color::green());
viz::WImageOverlay image3d3(img_test3, Rect(20, 40+75*2, img_test4.rows, img_test4.cols));
viz::WText arrow3(Pred, Point(90,60+75*2), 15, viz::Color::purple());
viz::WImageOverlay image3d4(img_test4, Rect(20, 40+75*3, img_test4.rows, img_test4.cols));
viz::WText arrow4(Pred, Point(90,60+75*3), 15, viz::Color::blue());
viz::WImageOverlay image3d5(img_test5, Rect(20, 40+75*4, img_test4.rows, img_test4.cols));
viz::WText arrow5(Pred, Point(90,60+75*4), 15, viz::Color::yellow());
viz::WImageOverlay image3d6(img_test6, Rect(20, 40+75*5, img_test4.rows, img_test4.cols));
viz::WText arrow6(Pred, Point(90,60+75*5), 15, viz::Color::orange());
viz::WText text_target(String("Query Image"), Point2d(20,530), 20, viz::Color::purple());
viz::WText text_pred(String("Predicted Images using 4 NN"), Point2d(80+110,530), 20, viz::Color::purple());
viz::WText text3d1(String("1st"), Point2d(80 + 110,500), 20, viz::Color::orange());
viz::WText text3d2(String("2nd"), Point2d(80 + 2*110,500), 20, viz::Color::orange());
viz::WText text3d3(String("3rd"), Point2d(80 + 3*110,500), 20, viz::Color::orange());
viz::WText text3d4(String("4th"), Point2d(80 + 4*110,500), 20, viz::Color::orange());
viz::WText classname1(String("ape: red"), Point2d(20,10), 11, viz::Color::red());
viz::WText classname2(String("ant: green"), Point2d(120,10), 11, viz::Color::green());
viz::WText classname3(String("cow: purple"), Point2d(220,10), 11, viz::Color::purple());
viz::WText classname4(String("plane: blue"), Point2d(320,10), 11, viz::Color::blue());
viz::WText classname5(String("bunny: yellow"), Point2d(420,10), 11, viz::Color::yellow());
viz::WText classname6(String("horse: orange"), Point2d(500,10), 11, viz::Color::orange());
myWindow0.showWidget("title", title, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow0.showWidget("hint", hint, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("image3d1", image3d1, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("image3d2", image3d2, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("image3d3", image3d3, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("image3d4", image3d4, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("image3d5", image3d5, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("image3d6", image3d6, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("arrow1", arrow1, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("arrow2", arrow2, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("arrow3", arrow3, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("arrow4", arrow4, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("arrow5", arrow5, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("arrow6", arrow6, Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
myWindow2.showWidget("text_target", text_target, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("text_pred", text_pred, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("text3d1", text3d1, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("text3d2", text3d2, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("text3d3", text3d3, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("text3d4", text3d4, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("classname1", classname1, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("classname2", classname2, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("classname3", classname3, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("classname4", classname4, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("classname5", classname5, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
myWindow2.showWidget("classname6", classname6, Affine3f().translate(Vec3f(0.0f,0.0f,0.0f)));
bunny_cloud = cvcloud_load(feature_reference[num_model]);
cam_focal_point = ViewSphere.getCenter(bunny_cloud);
radius = ViewSphere.getRadius(bunny_cloud, cam_focal_point);
viz::WCloud cloud_widget1(bunny_cloud.colRange(Range(0,641)), viz::Color::red());
viz::WCloud cloud_widget2(bunny_cloud.colRange(Range(642,642*2-1)), viz::Color::green());
viz::WCloud cloud_widget3(bunny_cloud.colRange(Range(642*2,642*3-1)), viz::Color::purple());
viz::WCloud cloud_widget4(bunny_cloud.colRange(Range(642*3,642*4-1)), viz::Color::blue());
viz::WCloud cloud_widget5(bunny_cloud.colRange(Range(642*4,642*5-1)), viz::Color::yellow());
viz::WCloud cloud_widget6(bunny_cloud.colRange(Range(642*5,642*6-1)), viz::Color::orange());
myWindow1.showWidget("obj1", cloud_widget1, cloud_pose_global);
myWindow1.setRenderingProperty("obj1",0,3);
myWindow1.showWidget("obj2", cloud_widget2, cloud_pose_global);
myWindow1.setRenderingProperty("obj2",0,3);
myWindow1.showWidget("obj3", cloud_widget3, cloud_pose_global);
myWindow1.setRenderingProperty("obj3",0,3);
myWindow1.showWidget("obj4", cloud_widget4, cloud_pose_global);
myWindow1.setRenderingProperty("obj4",0,3);
myWindow1.showWidget("obj5", cloud_widget5, cloud_pose_global);
myWindow1.setRenderingProperty("obj5",0,3);
myWindow1.showWidget("obj6", cloud_widget6, cloud_pose_global);
myWindow1.setRenderingProperty("obj6",0,3);
count_pre = 0;
for (int j = 1; j < num_candidate+1; ++j)
{
myWindow2.showWidget(widgename[count_pre], viz::WImageOverlay(img_gallery[matches1[num_model][0][j].trainIdx], Rect(80+110*j, 40+75*0, img_test4.rows, img_test4.cols)), Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
count_pre++;
myWindow2.showWidget(widgename[count_pre], viz::WImageOverlay(img_gallery[matches2[num_model][0][j].trainIdx], Rect(80+110*j, 40+75*1, img_test4.rows, img_test4.cols)), Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
count_pre++;
myWindow2.showWidget(widgename[count_pre], viz::WImageOverlay(img_gallery[matches3[num_model][0][j].trainIdx], Rect(80+110*j, 40+75*2, img_test4.rows, img_test4.cols)), Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
count_pre++;
myWindow2.showWidget(widgename[count_pre], viz::WImageOverlay(img_gallery[matches4[num_model][0][j].trainIdx], Rect(80+110*j, 40+75*3, img_test4.rows, img_test4.cols)), Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
count_pre++;
myWindow2.showWidget(widgename[count_pre], viz::WImageOverlay(img_gallery[matches5[num_model][0][j].trainIdx], Rect(80+110*j, 40+75*4, img_test4.rows, img_test4.cols)), Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
count_pre++;
myWindow2.showWidget(widgename[count_pre], viz::WImageOverlay(img_gallery[matches6[num_model][0][j].trainIdx], Rect(80+110*j, 40+75*5, img_test4.rows, img_test4.cols)), Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)));
count_pre++;
}
num_rotate = 0;
max_rotate = 15;
if (num_model == number_model-1)
max_rotate = 30000;
while (num_rotate != max_rotate)
{
translation_phase += CV_PI * 0.01f;
campos.x = sin(translation_phase);
campos.y = cos(translation_phase);
campos.z = 0;
/* Get the pose of the camera using makeCameraPoses. */
Affine3f cam_pose = viz::makeCameraPose(campos*radius*3.5+cam_focal_point, cam_focal_point, cam_y_dir*radius*3.5+cam_focal_point);
myWindow1.setViewerPose(cam_pose);
myWindow1.spinOnce(1, true);
myWindow2.spinOnce(1, true);
myWindow0.spinOnce(1, true);
num_rotate++;
}
myWindow0.removeAllWidgets();
myWindow1.removeAllWidgets();
myWindow2.removeAllWidgets();
}
return 0;
}