651 lines
19 KiB
C++
651 lines
19 KiB
C++
///////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Copyright (c) 2009-2014 DreamWorks Animation LLC.
|
|
//
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of DreamWorks Animation nor the names of
|
|
// its contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
#define OPENEXR_BUILTIN_TABLES
|
|
|
|
//
|
|
// A program to generate various acceleration lookup tables
|
|
// for Imf::DwaCompressor
|
|
//
|
|
|
|
#include <cstddef>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#include <vector>
|
|
|
|
#include <OpenEXRConfig.h>
|
|
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
#ifdef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN
|
|
#include <unistd.h>
|
|
#endif
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
|
|
#include <half.h>
|
|
#include <IlmThread.h>
|
|
#include <IlmThreadSemaphore.h>
|
|
#include <ImfIO.h>
|
|
#include <ImfXdr.h>
|
|
#include "ImfNamespace.h"
|
|
|
|
using namespace OPENEXR_IMF_NAMESPACE;
|
|
|
|
namespace {
|
|
|
|
#ifdef OPENEXR_BUILTIN_TABLES
|
|
static unsigned short dwaCompressorNoOp[0x10000] = {};
|
|
static unsigned short dwaCompressorToLinear[0x10000] = {};
|
|
static unsigned short dwaCompressorToNonlinear[0x10000] = {};
|
|
|
|
//static unsigned int closestDataOffset[0x10000] = {};
|
|
//static unsigned short closestData[0x80000] = {};
|
|
#else
|
|
|
|
class LutHeaderWorker
|
|
{
|
|
public:
|
|
class Runner : public ILMTHREAD_NAMESPACE::Thread
|
|
{
|
|
public:
|
|
Runner(LutHeaderWorker &worker, bool output):
|
|
ILMTHREAD_NAMESPACE::Thread(),
|
|
_worker(worker),
|
|
_output(output)
|
|
{
|
|
start();
|
|
}
|
|
|
|
virtual ~Runner()
|
|
{
|
|
_semaphore.wait();
|
|
}
|
|
|
|
virtual void run()
|
|
{
|
|
_semaphore.post();
|
|
_worker.run(_output);
|
|
}
|
|
|
|
private:
|
|
LutHeaderWorker &_worker;
|
|
bool _output;
|
|
ILMTHREAD_NAMESPACE::Semaphore _semaphore;
|
|
|
|
}; // class LutHeaderWorker::Runner
|
|
|
|
|
|
LutHeaderWorker(size_t startValue,
|
|
size_t endValue):
|
|
_lastCandidateCount(0),
|
|
_startValue(startValue),
|
|
_endValue(endValue),
|
|
_numElements(0),
|
|
_offset(new size_t[numValues()]),
|
|
_elements(new unsigned short[1024*1024*2])
|
|
{
|
|
}
|
|
|
|
~LutHeaderWorker()
|
|
{
|
|
delete[] _offset;
|
|
delete[] _elements;
|
|
}
|
|
|
|
size_t lastCandidateCount() const
|
|
{
|
|
return _lastCandidateCount;
|
|
}
|
|
|
|
size_t numValues() const
|
|
{
|
|
return _endValue - _startValue;
|
|
}
|
|
|
|
size_t numElements() const
|
|
{
|
|
return _numElements;
|
|
}
|
|
|
|
const size_t* offset() const
|
|
{
|
|
return _offset;
|
|
}
|
|
|
|
const unsigned short* elements() const
|
|
{
|
|
return _elements;
|
|
}
|
|
|
|
void run(bool outputProgress)
|
|
{
|
|
half candidate[16];
|
|
int candidateCount = 0;
|
|
|
|
for (size_t input=_startValue; input<_endValue; ++input) {
|
|
|
|
if (outputProgress) {
|
|
#ifdef __GNUC__
|
|
if (input % 100 == 0) {
|
|
fprintf(stderr,
|
|
" Building acceleration for DwaCompressor, %.2f %% %c",
|
|
100.*(float)input/(float)numValues(), 13);
|
|
}
|
|
#else
|
|
if (input % 1000 == 0) {
|
|
fprintf(stderr,
|
|
" Building acceleration for DwaCompressor, %.2f %%\n",
|
|
100.*(float)input/(float)numValues());
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
int numSetBits = countSetBits(input);
|
|
half inputHalf, closestHalf;
|
|
|
|
inputHalf.setBits(input);
|
|
|
|
_offset[input - _startValue] = _numElements;
|
|
|
|
// Gather candidates
|
|
candidateCount = 0;
|
|
for (int targetNumSetBits=numSetBits-1; targetNumSetBits>=0;
|
|
--targetNumSetBits) {
|
|
bool valueFound = false;
|
|
|
|
for (int i=0; i<65536; ++i) {
|
|
if (countSetBits(i) != targetNumSetBits) continue;
|
|
|
|
if (!valueFound) {
|
|
closestHalf.setBits(i);
|
|
valueFound = true;
|
|
} else {
|
|
half tmpHalf;
|
|
|
|
tmpHalf.setBits(i);
|
|
|
|
if (fabs((float)inputHalf - (float)tmpHalf) <
|
|
fabs((float)inputHalf - (float)closestHalf)) {
|
|
closestHalf = tmpHalf;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (valueFound == false) {
|
|
fprintf(stderr, "bork bork bork!\n");
|
|
}
|
|
|
|
candidate[candidateCount] = closestHalf;
|
|
candidateCount++;
|
|
}
|
|
|
|
// Sort candidates by increasing number of bits set
|
|
for (int i=0; i<candidateCount; ++i) {
|
|
for (int j=i+1; j<candidateCount; ++j) {
|
|
|
|
int iCnt = countSetBits(candidate[i].bits());
|
|
int jCnt = countSetBits(candidate[j].bits());
|
|
|
|
if (jCnt < iCnt) {
|
|
half tmp = candidate[i];
|
|
candidate[i] = candidate[j];
|
|
candidate[j] = tmp;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Copy candidates to the data buffer;
|
|
for (int i=0; i<candidateCount; ++i) {
|
|
_elements[_numElements] = candidate[i].bits();
|
|
_numElements++;
|
|
}
|
|
|
|
if (input == _endValue-1) {
|
|
_lastCandidateCount = candidateCount;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
private:
|
|
size_t _lastCandidateCount;
|
|
size_t _startValue;
|
|
size_t _endValue;
|
|
size_t _numElements;
|
|
size_t *_offset;
|
|
unsigned short *_elements;
|
|
|
|
//
|
|
// Precomputing the bit count runs faster than using
|
|
// the builtin instruction, at least in one case..
|
|
//
|
|
// Precomputing 8-bits is no slower than 16-bits,
|
|
// and saves a fair bit of overhead..
|
|
//
|
|
int countSetBits(unsigned short src)
|
|
{
|
|
static const unsigned short numBitsSet[256] =
|
|
{
|
|
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
|
};
|
|
|
|
return numBitsSet[src & 0xff] + numBitsSet[src >> 8];
|
|
}
|
|
|
|
}; // class LutHeaderWorker
|
|
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
|
|
} // namespace
|
|
|
|
|
|
//
|
|
// Generate a no-op LUT, to cut down in conditional branches
|
|
//
|
|
static void
|
|
generateNoop()
|
|
{
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
printf("const unsigned short dwaCompressorNoOp[] = \n");
|
|
printf("{");
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
for (int i=0; i<65536; ++i) {
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
if (i % 8 == 0) {
|
|
printf("\n ");
|
|
}
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
unsigned short dst;
|
|
char *tmp = (char *)(&dst);
|
|
|
|
unsigned short src = (unsigned short)i;
|
|
Xdr::write <CharPtrIO> (tmp, src);
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
printf("0x%04x, ", dst);
|
|
#else
|
|
dwaCompressorNoOp[i] = dst;
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
}
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
printf("\n};\n");
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
}
|
|
|
|
//
|
|
// Nonlinearly encode luminance. For values below 1.0, we want
|
|
// to use a gamma 2.2 function to match what is fairly common
|
|
// for storing output referred. However, > 1, gamma functions blow up,
|
|
// and log functions are much better behaved. We could use a log
|
|
// function everywhere, but it tends to over-sample dark
|
|
// regions and undersample the brighter regions, when
|
|
// compared to the way real devices reproduce values.
|
|
//
|
|
// So, above 1, use a log function which is a smooth blend
|
|
// into the gamma function.
|
|
//
|
|
// Nonlinear(linear) =
|
|
//
|
|
// linear^(1./2.2) / linear <= 1.0
|
|
// |
|
|
// ln(linear)/ln(e^2.2) + 1 \ otherwise
|
|
//
|
|
//
|
|
// toNonlinear[] needs to take in XDR format half float values,
|
|
// and output NATIVE format float.
|
|
//
|
|
// toLinear[] does the opposite - takes in NATIVE half and
|
|
// outputs XDR half values.
|
|
//
|
|
|
|
static void
|
|
generateToLinear()
|
|
{
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
unsigned short toLinear[65536];
|
|
#else
|
|
unsigned short* toLinear = dwaCompressorToLinear;
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
|
|
toLinear[0] = 0;
|
|
|
|
for (int i=1; i<65536; ++i) {
|
|
half h;
|
|
float sign = 1;
|
|
float logBase = pow(2.7182818, 2.2);
|
|
|
|
// map NaN and inf to 0
|
|
if ((i & 0x7c00) == 0x7c00) {
|
|
toLinear[i] = 0;
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// _toLinear - assume i is NATIVE, but our output needs
|
|
// to get flipped to XDR
|
|
//
|
|
h.setBits(i);
|
|
sign = 1;
|
|
if ((float)h < 0) {
|
|
sign = -1;
|
|
}
|
|
|
|
if ( fabs( (float)h) <= 1.0 ) {
|
|
h = (half)(sign * pow((float)fabs((float)h), 2.2f));
|
|
} else {
|
|
h = (half)(sign * pow(logBase, (float)(fabs((float)h) - 1.0)));
|
|
}
|
|
|
|
{
|
|
char *tmp = (char *)(&toLinear[i]);
|
|
|
|
Xdr::write <CharPtrIO> ( tmp, h.bits());
|
|
}
|
|
}
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
printf("const unsigned short dwaCompressorToLinear[] = \n");
|
|
printf("{");
|
|
for (int i=0; i<65536; ++i) {
|
|
if (i % 8 == 0) {
|
|
printf("\n ");
|
|
}
|
|
printf("0x%04x, ", toLinear[i]);
|
|
}
|
|
printf("\n};\n");
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
}
|
|
|
|
|
|
static void
|
|
generateToNonlinear()
|
|
{
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
unsigned short toNonlinear[65536];
|
|
#else
|
|
unsigned short* toNonlinear = dwaCompressorToNonlinear;
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
|
|
toNonlinear[0] = 0;
|
|
|
|
for (int i=1; i<65536; ++i) {
|
|
unsigned short usNative, usXdr;
|
|
half h;
|
|
float sign = 1;
|
|
float logBase = pow(2.7182818, 2.2);
|
|
|
|
usXdr = i;
|
|
|
|
{
|
|
const char *tmp = (char *)(&usXdr);
|
|
|
|
Xdr::read<CharPtrIO>(tmp, usNative);
|
|
}
|
|
|
|
// map NaN and inf to 0
|
|
if ((usNative & 0x7c00) == 0x7c00) {
|
|
toNonlinear[i] = 0;
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// toNonlinear - assume i is XDR
|
|
//
|
|
h.setBits(usNative);
|
|
sign = 1;
|
|
if ((float)h < 0) {
|
|
sign = -1;
|
|
}
|
|
|
|
if ( fabs( (float)h ) <= 1.0) {
|
|
h = (half)(sign * pow(fabs((float)h), 1.f/2.2f));
|
|
} else {
|
|
h = (half)(sign * ( log(fabs((float)h)) / log(logBase) + 1.0) );
|
|
}
|
|
toNonlinear[i] = h.bits();
|
|
}
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
printf("const unsigned short dwaCompressorToNonlinear[] = \n");
|
|
printf("{");
|
|
for (int i=0; i<65536; ++i) {
|
|
if (i % 8 == 0) {
|
|
printf("\n ");
|
|
}
|
|
printf("0x%04x, ", toNonlinear[i]);
|
|
}
|
|
printf("\n};\n");
|
|
#endif // OPENEXR_BUILTIN_TABLES
|
|
}
|
|
|
|
|
|
#ifndef OPENEXR_BUILTIN_TABLES
|
|
//
|
|
// Attempt to get available CPUs in a somewhat portable way.
|
|
//
|
|
|
|
int
|
|
cpuCount()
|
|
{
|
|
if (!ILMTHREAD_NAMESPACE::supportsThreads()) return 1;
|
|
|
|
int cpuCount = 1;
|
|
|
|
#if defined (OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN)
|
|
|
|
cpuCount = sysconf(_SC_NPROCESSORS_ONLN);
|
|
|
|
#elif defined (_WIN32)
|
|
|
|
SYSTEM_INFO sysinfo;
|
|
GetSystemInfo( &sysinfo );
|
|
cpuCount = sysinfo.dwNumberOfProcessors;
|
|
|
|
#endif
|
|
|
|
if (cpuCount < 1) cpuCount = 1;
|
|
return cpuCount;
|
|
}
|
|
|
|
//
|
|
// Generate acceleration luts for the quantization.
|
|
//
|
|
// For each possible input value, we want to find the closest numbers
|
|
// which have one fewer bits set than before.
|
|
//
|
|
// This gives us num_bits(input)-1 values per input. If we alloc
|
|
// space for everything, that's like a 2MB table. We can do better
|
|
// by compressing all the values to be contigious and using offset
|
|
// pointers.
|
|
//
|
|
// After we've found the candidates with fewer bits set, sort them
|
|
// based on increasing numbers of bits set. This way, on quantize(),
|
|
// we can scan through the list and halt once we find the first
|
|
// candidate within the error range. For small values that can
|
|
// be quantized to 0, 0 is the first value tested and the search
|
|
// can exit fairly quickly.
|
|
//
|
|
|
|
void
|
|
generateLutHeader()
|
|
{
|
|
std::vector<LutHeaderWorker*> workers;
|
|
|
|
size_t numWorkers = cpuCount();
|
|
size_t workerInterval = 65536 / numWorkers;
|
|
|
|
for (size_t i=0; i<numWorkers; ++i) {
|
|
if (i != numWorkers-1) {
|
|
workers.push_back( new LutHeaderWorker( i *workerInterval,
|
|
(i+1)*workerInterval) );
|
|
} else {
|
|
workers.push_back( new LutHeaderWorker(i*workerInterval, 65536) );
|
|
}
|
|
}
|
|
|
|
if (ILMTHREAD_NAMESPACE::supportsThreads()) {
|
|
std::vector<LutHeaderWorker::Runner*> runners;
|
|
for (size_t i=0; i<workers.size(); ++i) {
|
|
runners.push_back( new LutHeaderWorker::Runner(*workers[i], (i==0)) );
|
|
}
|
|
|
|
for (size_t i=0; i<workers.size(); ++i) {
|
|
delete runners[i];
|
|
}
|
|
} else {
|
|
for (size_t i=0; i<workers.size(); ++i) {
|
|
workers[i]->run(i == 0);
|
|
}
|
|
}
|
|
|
|
printf("static unsigned int closestDataOffset[] = {\n");
|
|
int offsetIdx = 0;
|
|
int offsetPrev = 0;
|
|
for (size_t i=0; i<workers.size(); ++i) {
|
|
for (size_t value=0; value<workers[i]->numValues(); ++value) {
|
|
if (offsetIdx % 8 == 0) {
|
|
printf(" ");
|
|
}
|
|
printf("%6lu, ", workers[i]->offset()[value] + offsetPrev);
|
|
if (offsetIdx % 8 == 7) {
|
|
printf("\n");
|
|
}
|
|
offsetIdx++;
|
|
}
|
|
offsetPrev += workers[i]->offset()[workers[i]->numValues()-1] +
|
|
workers[i]->lastCandidateCount();
|
|
}
|
|
printf("};\n\n\n");
|
|
|
|
|
|
printf("static unsigned short closestData[] = {\n");
|
|
int elementIdx = 0;
|
|
for (size_t i=0; i<workers.size(); ++i) {
|
|
for (size_t element=0; element<workers[i]->numElements(); ++element) {
|
|
if (elementIdx % 8 == 0) {
|
|
printf(" ");
|
|
}
|
|
printf("%5d, ", workers[i]->elements()[element]);
|
|
if (elementIdx % 8 == 7) {
|
|
printf("\n");
|
|
}
|
|
elementIdx++;
|
|
}
|
|
}
|
|
printf("};\n\n\n");
|
|
|
|
for (size_t i=0; i<workers.size(); ++i) {
|
|
delete workers[i];
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
printf("#include <cstddef>\n");
|
|
printf("\n\n\n");
|
|
|
|
generateNoop();
|
|
|
|
printf("\n\n\n");
|
|
|
|
generateToLinear();
|
|
|
|
printf("\n\n\n");
|
|
|
|
generateToNonlinear();
|
|
|
|
printf("\n\n\n");
|
|
|
|
generateLutHeader();
|
|
|
|
return 0;
|
|
}
|
|
#else // OPENEXR_BUILTIN_TABLES
|
|
|
|
#include "dwaLookups.h"
|
|
|
|
OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
|
|
|
|
static void init_dwa_()
|
|
{
|
|
generateNoop();
|
|
generateToLinear();
|
|
generateToNonlinear();
|
|
// N/A: generateLutHeader();
|
|
}
|
|
|
|
static inline void init_dwa()
|
|
{
|
|
static bool initialized = false;
|
|
if (!initialized)
|
|
{
|
|
init_dwa_();
|
|
initialized = true;
|
|
}
|
|
}
|
|
|
|
const unsigned short* get_dwaCompressorNoOp()
|
|
{
|
|
init_dwa();
|
|
return dwaCompressorNoOp;
|
|
}
|
|
const unsigned short* get_dwaCompressorToLinear()
|
|
{
|
|
init_dwa();
|
|
return dwaCompressorToLinear;
|
|
}
|
|
const unsigned short* get_dwaCompressorToNonlinear()
|
|
{
|
|
init_dwa();
|
|
return dwaCompressorToNonlinear;
|
|
}
|
|
|
|
OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
|
|
|
|
#endif // OPENEXR_BUILTIN_TABLES
|