318 lines
11 KiB
C++
318 lines
11 KiB
C++
/*
|
|
* By downloading, copying, installing or using the software you agree to this license.
|
|
* If you do not agree to this license, do not download, install,
|
|
* copy or use the software.
|
|
*
|
|
*
|
|
* License Agreement
|
|
* For Open Source Computer Vision Library
|
|
* (3-clause BSD License)
|
|
*
|
|
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
|
|
* Third party copyrights are property of their respective owners.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* * Neither the names of the copyright holders nor the names of the contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided by the copyright holders and contributors "as is" and
|
|
* any express or implied warranties, including, but not limited to, the implied
|
|
* warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall copyright holders or contributors be liable for any direct,
|
|
* indirect, incidental, special, exemplary, or consequential damages
|
|
* (including, but not limited to, procurement of substitute goods or services;
|
|
* loss of use, data, or profits; or business interruption) however caused
|
|
* and on any theory of liability, whether in contract, strict liability,
|
|
* or tort (including negligence or otherwise) arising in any way out of
|
|
* the use of this software, even if advised of the possibility of such damage.
|
|
*/
|
|
|
|
#include <vector>
|
|
|
|
#include "common.hpp"
|
|
|
|
namespace CAROTENE_NS {
|
|
|
|
bool isSobel3x3Supported(const Size2D &size, BORDER_MODE border,
|
|
s32 dx, s32 dy, Margin borderMargin)
|
|
{
|
|
return dx < 3 && dx >= 0 &&
|
|
dy < 3 && dy >= 0 &&
|
|
(dx + dy) > 0 &&
|
|
isSeparableFilter3x3Supported(size, border, dx, dy, borderMargin);
|
|
}
|
|
|
|
void Sobel3x3(const Size2D &size,
|
|
const u8 * srcBase, ptrdiff_t srcStride,
|
|
s16 * dstBase, ptrdiff_t dstStride,
|
|
s32 dx, s32 dy,
|
|
BORDER_MODE borderType, u8 borderValue, Margin borderMargin)
|
|
{
|
|
internal::assertSupportedConfiguration(isSobel3x3Supported(size, borderType, dx, dy, borderMargin));
|
|
#ifdef CAROTENE_NEON
|
|
SeparableFilter3x3(size, srcBase, srcStride, dstBase, dstStride,
|
|
dx, dy, 0, 0,
|
|
borderType, borderValue, borderMargin);
|
|
#else
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)borderValue;
|
|
#endif
|
|
}
|
|
|
|
bool isSobel3x3f32Supported(const Size2D &size, BORDER_MODE border,
|
|
s32 dx, s32 dy)
|
|
{
|
|
return isSupportedConfiguration() &&
|
|
dx < 3 && dx >= 0 &&
|
|
dy < 3 && dy >= 0 &&
|
|
(dx + dy) > 0 &&
|
|
size.width >= 4 && size.height >= 2 &&
|
|
(border == BORDER_MODE_CONSTANT ||
|
|
border == BORDER_MODE_REFLECT ||
|
|
border == BORDER_MODE_REFLECT101 ||
|
|
border == BORDER_MODE_REPLICATE );
|
|
}
|
|
|
|
void Sobel3x3(const Size2D &size,
|
|
const f32 * srcBase, ptrdiff_t srcStride,
|
|
f32 * dstBase, ptrdiff_t dstStride,
|
|
s32 dx, s32 dy,
|
|
BORDER_MODE borderType, f32 borderValue)
|
|
{
|
|
internal::assertSupportedConfiguration(isSobel3x3f32Supported(size, borderType, dx, dy));
|
|
#ifdef CAROTENE_NEON
|
|
std::vector<f32> _tmp;
|
|
f32 *tmp = 0;
|
|
if (borderType == BORDER_MODE_CONSTANT)
|
|
{
|
|
_tmp.assign(size.width + 2, borderValue);
|
|
tmp = &_tmp[1];
|
|
}
|
|
|
|
ptrdiff_t delta = (ptrdiff_t)((size.width + 2 + 31) & -32);//align size
|
|
std::vector<f32> _tempBuf((delta << 1) + 64);
|
|
f32 *trow0 = internal::alignPtr(&_tempBuf[1], 32), *trow1 = internal::alignPtr(trow0 + delta, 32);
|
|
|
|
for( size_t y = 0; y < size.height; y++ )
|
|
{
|
|
const f32* srow0;
|
|
const f32* srow1 = internal::getRowPtr(srcBase, srcStride, y);
|
|
const f32* srow2;
|
|
f32* drow = internal::getRowPtr(dstBase, dstStride, y > 0 ? y-1 : 0);
|
|
f32* drow1 = internal::getRowPtr(dstBase, dstStride, y);
|
|
if (borderType == BORDER_MODE_REFLECT101) {
|
|
srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 1);
|
|
srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-2);
|
|
} else if (borderType == BORDER_MODE_CONSTANT) {
|
|
srow0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;
|
|
srow2 = y < size.height-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;
|
|
} else { // BORDER_MODE_REFLECT || BORDER_MODE_REPLICATE
|
|
srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
|
|
srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-1);
|
|
}
|
|
|
|
float32x4_t tprev = vmovq_n_f32(0.f);
|
|
float32x4_t tcurr = vmovq_n_f32(0.f);
|
|
float32x4_t tnext = vmovq_n_f32(0.f);
|
|
float32x4_t t0, t1, t2;
|
|
// do vertical convolution
|
|
size_t x = 0, bcolsn = y + 2 < size.height ? size.width : (size.width - 4);
|
|
for( ; x <= bcolsn; x += 4 )
|
|
{
|
|
internal::prefetch(srow0 + x);
|
|
internal::prefetch(srow1 + x);
|
|
internal::prefetch(srow2 + x);
|
|
|
|
float32x4_t x0 = vld1q_f32(srow0 + x);
|
|
float32x4_t x1 = vld1q_f32(srow1 + x);
|
|
float32x4_t x2 = vld1q_f32(srow2 + x);
|
|
|
|
tprev = tcurr;
|
|
tcurr = tnext;
|
|
if(!dy)
|
|
{
|
|
tnext = vaddq_f32(vaddq_f32(vaddq_f32(x1, x1), x2), x0);
|
|
}
|
|
else if(dy == 2)
|
|
{
|
|
tnext = vsubq_f32(vsubq_f32(x2, x1), vsubq_f32(x1, x0));
|
|
}
|
|
else
|
|
{
|
|
tnext = vsubq_f32(x2, x0);
|
|
}
|
|
|
|
if(!x) {
|
|
tcurr = tnext;
|
|
// make border
|
|
if (borderType == BORDER_MODE_CONSTANT)
|
|
{
|
|
tcurr = vsetq_lane_f32(borderValue,tcurr, 3);
|
|
}
|
|
else if (borderType == BORDER_MODE_REFLECT101)
|
|
{
|
|
tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 1),tcurr, 3);
|
|
}
|
|
else // BORDER_MODE_REFLECT || BORDER_MODE_REPLICATE
|
|
{
|
|
tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 0),tcurr, 3);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
internal::prefetch(trow0 + x);
|
|
internal::prefetch(trow1 + x);
|
|
|
|
t0 = vextq_f32(tprev, tcurr, 3);
|
|
t1 = tcurr;
|
|
t2 = vextq_f32(tcurr, tnext, 1);
|
|
if(!dx)
|
|
{
|
|
t0 = vaddq_f32(t0, vaddq_f32(vaddq_f32(t1, t1), t2));
|
|
}
|
|
else if(dx == 2)
|
|
{
|
|
t0 = vsubq_f32(vsubq_f32(t2, t1), vsubq_f32(t1, t0));
|
|
}
|
|
else
|
|
{
|
|
t0 = vsubq_f32(t2, t0);
|
|
}
|
|
|
|
if(!(y%2))
|
|
{
|
|
vst1q_f32(trow0 + x - 4, t0);
|
|
}
|
|
else
|
|
{
|
|
vst1q_f32(trow1 + x - 4, t0);
|
|
}
|
|
}
|
|
x -= 4;
|
|
if(x == size.width){
|
|
x--;
|
|
}
|
|
f32 prevx = 0, rowx = 0, nextx = 0;
|
|
if(!dy)
|
|
{
|
|
prevx = x > 0 ? srow2[x-1] + 2*srow1[x-1] + srow0[x-1] :
|
|
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] + 2*srow1[1] + srow0[1] :
|
|
(borderType == BORDER_MODE_CONSTANT ? 4*borderValue :
|
|
srow2[0] + 2*srow1[0] + srow0[0]) );
|
|
rowx = srow2[x] + 2*srow1[x] + srow0[x];
|
|
}
|
|
else if(dy == 2)
|
|
{
|
|
prevx = x > 0 ? srow2[x-1] - 2*srow1[x-1] + srow0[x-1] :
|
|
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] - 2*srow1[1] + srow0[1] :
|
|
(borderType == BORDER_MODE_CONSTANT ? 0.f :
|
|
srow2[0] - 2*srow1[0] + srow0[0]) );
|
|
rowx = srow2[x] - 2*srow1[x] + srow0[x];
|
|
}
|
|
else
|
|
{
|
|
prevx = x > 0 ? srow2[x-1] - srow0[x-1] :
|
|
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] - srow0[1] :
|
|
(borderType == BORDER_MODE_CONSTANT ? 0.f :
|
|
srow2[0] - srow0[0]) );
|
|
rowx = srow2[x] - srow0[x];
|
|
}
|
|
|
|
for( ; x < size.width; x++ )
|
|
{
|
|
if(x+1 == size.width) {
|
|
// make border
|
|
if (borderType == BORDER_MODE_CONSTANT)
|
|
{
|
|
if(!dy) {
|
|
nextx = 4*borderValue;
|
|
} else {
|
|
nextx = 0.f;
|
|
}
|
|
} else if (borderType == BORDER_MODE_REFLECT101)
|
|
{
|
|
if(!dy) {
|
|
nextx = srow2[x-1] + 2*srow1[x-1] + srow0[x-1];
|
|
} else if(dy == 2) {
|
|
nextx = srow2[x-1] - 2*srow1[x-1] + srow0[x-1];
|
|
} else {
|
|
nextx = srow2[x-1] - srow0[x-1];
|
|
}
|
|
} else {
|
|
if(!dy) {
|
|
nextx = srow2[x] + 2*srow1[x] + srow0[x];
|
|
} else if(dy == 2) {
|
|
nextx = srow2[x] - 2*srow1[x] + srow0[x];
|
|
} else {
|
|
nextx = srow2[x] - srow0[x];
|
|
}
|
|
}
|
|
} else {
|
|
if(!dy) {
|
|
nextx = srow2[x+1] + 2*srow1[x+1] + srow0[x+1];
|
|
} else if(dy == 2) {
|
|
nextx = srow2[x+1] - 2*srow1[x+1] + srow0[x+1];
|
|
} else {
|
|
nextx = srow2[x+1] - srow0[x+1];
|
|
}
|
|
}
|
|
f32 res;
|
|
if(dx==1) {
|
|
res = nextx - prevx;
|
|
} else if(!dx) {
|
|
res = prevx + 2*rowx + nextx;
|
|
} else {
|
|
res = prevx - 2*rowx + nextx;
|
|
}
|
|
if(!(y%2)) {
|
|
*(trow0+x) = res;
|
|
} else {
|
|
*(trow1+x) = res;
|
|
}
|
|
prevx = rowx;
|
|
rowx = nextx;
|
|
}
|
|
|
|
if(y>0) {
|
|
for(size_t x1 = 0; x1 < size.width; x1++ )
|
|
{
|
|
if(y%2)
|
|
*(drow + x1) = trow0[x1];
|
|
else
|
|
*(drow + x1) = trow1[x1];
|
|
}
|
|
}
|
|
if(y == size.height-1) {
|
|
for(size_t x1 = 0; x1 < size.width; x1++ )
|
|
{
|
|
if(!(y%2))
|
|
*(drow1 + x1) = trow0[x1];
|
|
else
|
|
*(drow1 + x1) = trow1[x1];
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
(void)srcBase;
|
|
(void)srcStride;
|
|
(void)dstBase;
|
|
(void)dstStride;
|
|
(void)borderValue;
|
|
#endif
|
|
}
|
|
|
|
} // namespace CAROTENE_NS
|