15 #include "reconstruct3d.h" 16 #include "visiontransfer/alignedallocator.h" 26 #include <immintrin.h> 28 #include <emmintrin.h> 39 class Reconstruct3D::Pimpl {
43 float* createPointMap(
const unsigned short* dispMap,
int width,
int height,
44 int rowStride,
const float* q,
unsigned short minDisparity);
46 float* createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity);
48 void projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
const float* q,
49 float& pointX,
float& pointY,
float& pointZ);
51 void writePlyFile(
const char* file,
const unsigned short* dispMap,
53 int dispRowStride,
int imageRowStride,
const float* q,
54 double maxZ,
bool binary);
56 void writePlyFile(
const char* file,
const ImageSet& imageSet,
57 double maxZ,
bool binary);
60 std::vector<float, AlignedAllocator<float> > pointMap;
62 float* createPointMapFallback(
const unsigned short* dispMap,
int width,
int height,
63 int rowStride,
const float* q,
unsigned short minDisparity);
65 float* createPointMapSSE2(
const unsigned short* dispMap,
int width,
int height,
66 int rowStride,
const float* q,
unsigned short minDisparity);
68 float* createPointMapAVX2(
const unsigned short* dispMap,
int width,
int height,
69 int rowStride,
const float* q,
unsigned short minDisparity);
74 Reconstruct3D::Reconstruct3D()
78 Reconstruct3D::~Reconstruct3D() {
83 int rowStride,
const float* q,
unsigned short minDisparity) {
84 return pimpl->createPointMap(dispMap, width, height, rowStride, q, minDisparity);
88 return pimpl->createPointMap(imageSet, minDisparity);
92 const float* q,
float& pointX,
float& pointY,
float& pointZ) {
93 pimpl->projectSinglePoint(imageX, imageY, disparity, q, pointX, pointY, pointZ);
98 int imageRowStride,
const float* q,
double maxZ,
bool binary) {
99 pimpl->writePlyFile(file, dispMap, image, width, height, format, dispRowStride,
100 imageRowStride, q, maxZ, binary);
104 double maxZ,
bool binary) {
105 pimpl->writePlyFile(file, imageSet, maxZ, binary);
110 Reconstruct3D::Pimpl::Pimpl() {
113 float* Reconstruct3D::Pimpl::createPointMap(
const unsigned short* dispMap,
int width,
114 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
117 if(pointMap.size() !=
static_cast<unsigned int>(4*width*height)) {
118 pointMap.resize(4*width*height);
122 if(width % 16 == 0) {
123 return createPointMapAVX2(dispMap, width, height, rowStride, q, minDisparity);
128 return createPointMapSSE2(dispMap, width, height, rowStride, q, minDisparity);
131 return createPointMapFallback(dispMap, width, height, rowStride, q, minDisparity);
134 float* Reconstruct3D::Pimpl::createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity) {
136 throw std::runtime_error(
"ImageSet does not contain a disparity map!");
140 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
147 float* Reconstruct3D::Pimpl::createPointMapFallback(
const unsigned short* dispMap,
int width,
148 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
150 float* outputPtr = &pointMap[0];
151 int stride = rowStride / 2;
153 for(
int y = 0; y < height; y++) {
154 double qx = q[1]*y + q[3];
155 double qy = q[5]*y + q[7];
156 double qz = q[9]*y + q[11];
157 double qw = q[13]*y + q[15];
159 for(
int x = 0; x < width; x++) {
160 unsigned short intDisp = std::max(minDisparity, dispMap[y*stride + x]);
161 if(intDisp >= 0xFFF) {
162 intDisp = minDisparity;
165 double d = intDisp / 16.0;
166 double w = qw + q[14]*d;
168 *outputPtr =
static_cast<float>((qx + q[2]*d)/w);
171 *outputPtr =
static_cast<float>((qy + q[6]*d)/w);
174 *outputPtr =
static_cast<float>((qz + q[10]*d)/w);
186 void Reconstruct3D::Pimpl::projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
187 const float* q,
float& pointX,
float& pointY,
float& pointZ) {
189 double d = disparity / 16.0;
190 double w = q[15] + q[14]*d;
191 pointX =
static_cast<float>((imageX*q[0] + q[3])/w);
192 pointY =
static_cast<float>((imageY*q[5] + q[7])/w);
193 pointZ =
static_cast<float>(q[11]/w);
197 float* Reconstruct3D::Pimpl::createPointMapAVX2(
const unsigned short* dispMap,
int width,
198 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
201 const __m256 qCol0 = _mm256_setr_ps(q[0], q[4], q[8], q[12], q[0], q[4], q[8], q[12]);
202 const __m256 qCol1 = _mm256_setr_ps(q[1], q[5], q[9], q[13], q[1], q[5], q[9], q[13]);
203 const __m256 qCol2 = _mm256_setr_ps(q[2], q[6], q[10], q[14], q[2], q[6], q[10], q[14]);
204 const __m256 qCol3 = _mm256_setr_ps(q[3], q[7], q[11], q[15], q[3], q[7], q[11], q[15]);
207 const __m256i minDispVector = _mm256_set1_epi16(minDisparity);
208 const __m256i maxDispVector = _mm256_set1_epi16(0xFFF);
209 const __m256 scaleVector = _mm256_set1_ps(1.0/16.0);
210 const __m256i zeroVector = _mm256_set1_epi16(0);
212 float* outputPtr = &pointMap[0];
214 for(
int y = 0; y < height; y++) {
215 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
216 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
219 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 32) {
220 __m256i disparities = _mm256_load_si256(reinterpret_cast<const __m256i*>(ptr));
223 __m256i validMask = _mm256_cmpgt_epi16(maxDispVector, disparities);
224 disparities = _mm256_and_si256(validMask, disparities);
227 disparities = _mm256_max_epi16(disparities, minDispVector);
230 __m256i disparitiesMixup = _mm256_permute4x64_epi64(disparities, 0xd8);
233 __m256 floatDisp = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(disparitiesMixup, zeroVector));
234 __m256 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
238 __declspec(align(32))
float dispArray[16];
240 float dispArray[16]__attribute__((aligned(32)));
242 _mm256_store_ps(&dispArray[0], dispScaled);
245 floatDisp = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(disparitiesMixup, zeroVector));
246 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
247 _mm256_store_ps(&dispArray[8], dispScaled);
250 for(
int i=0; i<16; i+=2) {
252 __m256 vec = _mm256_setr_ps(x, y, dispArray[i], 1.0,
253 x+1, y, dispArray[i+1], 1.0);
256 __m256 u1 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
257 __m256 u2 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
258 __m256 u3 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
259 __m256 u4 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
261 __m256 prod1 = _mm256_mul_ps(u1, qCol0);
262 __m256 prod2 = _mm256_mul_ps(u2, qCol1);
263 __m256 prod3 = _mm256_mul_ps(u3, qCol2);
264 __m256 prod4 = _mm256_mul_ps(u4, qCol3);
266 __m256 multResult = _mm256_add_ps(_mm256_add_ps(prod1, prod2), _mm256_add_ps(prod3, prod4));
269 __m256 point = _mm256_div_ps(multResult,
270 _mm256_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
273 _mm256_store_ps(outputPtr, point);
286 float* Reconstruct3D::Pimpl::createPointMapSSE2(
const unsigned short* dispMap,
int width,
287 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
290 const __m128 qCol0 = _mm_setr_ps(q[0], q[4], q[8], q[12]);
291 const __m128 qCol1 = _mm_setr_ps(q[1], q[5], q[9], q[13]);
292 const __m128 qCol2 = _mm_setr_ps(q[2], q[6], q[10], q[14]);
293 const __m128 qCol3 = _mm_setr_ps(q[3], q[7], q[11], q[15]);
296 const __m128i minDispVector = _mm_set1_epi16(minDisparity);
297 const __m128i maxDispVector = _mm_set1_epi16(0xFFF);
298 const __m128 scaleVector = _mm_set1_ps(1.0/16.0);
299 const __m128i zeroVector = _mm_set1_epi16(0);
301 float* outputPtr = &pointMap[0];
303 for(
int y = 0; y < height; y++) {
304 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
305 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
308 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 16) {
309 __m128i disparities = _mm_load_si128(reinterpret_cast<const __m128i*>(ptr));
312 __m128i validMask = _mm_cmplt_epi16(disparities, maxDispVector);
313 disparities = _mm_and_si128(validMask, disparities);
316 disparities = _mm_max_epi16(disparities, minDispVector);
319 __m128 floatDisp = _mm_cvtepi32_ps(_mm_unpacklo_epi16(disparities, zeroVector));
320 __m128 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
324 __declspec(align(16))
float dispArray[8];
326 float dispArray[8]__attribute__((aligned(16)));
328 _mm_store_ps(&dispArray[0], dispScaled);
331 floatDisp = _mm_cvtepi32_ps(_mm_unpackhi_epi16(disparities, zeroVector));
332 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
333 _mm_store_ps(&dispArray[4], dispScaled);
336 for(
int i=0; i<8; i++) {
338 __m128 vec = _mm_setr_ps(static_cast<float>(x), static_cast<float>(y), dispArray[i], 1.0);
341 __m128 u1 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
342 __m128 u2 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
343 __m128 u3 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
344 __m128 u4 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
346 __m128 prod1 = _mm_mul_ps(u1, qCol0);
347 __m128 prod2 = _mm_mul_ps(u2, qCol1);
348 __m128 prod3 = _mm_mul_ps(u3, qCol2);
349 __m128 prod4 = _mm_mul_ps(u4, qCol3);
351 __m128 multResult = _mm_add_ps(_mm_add_ps(prod1, prod2), _mm_add_ps(prod3, prod4));
354 __m128 point = _mm_div_ps(multResult,
355 _mm_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
358 _mm_store_ps(outputPtr, point);
370 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const unsigned short* dispMap,
372 int imageRowStride,
const float* q,
double maxZ,
bool binary) {
374 float* pointMap =
createPointMap(dispMap, width, height, dispRowStride, q, 0);
379 for(
int i=0; i<width*height; i++) {
380 if(pointMap[4*i+2] <= maxZ) {
385 pointsCount = width*height;
389 fstream strm(file, binary ? (ios::out | ios::binary) : ios::out);
390 strm <<
"ply" << endl;
393 strm <<
"format binary_little_endian 1.0" << endl;
395 strm <<
"format ascii 1.0" << endl;
398 strm <<
"element vertex " << pointsCount << endl
399 <<
"property float x" << endl
400 <<
"property float y" << endl
401 <<
"property float z" << endl;
402 if (image !=
nullptr) {
404 strm <<
"property uchar red" << endl
405 <<
"property uchar green" << endl
406 <<
"property uchar blue" << endl;
408 strm <<
"end_header" << endl;
411 for(
int i=0; i<width*height; i++) {
415 if(maxZ < 0 || pointMap[4*i+2] <= maxZ) {
418 strm.write(reinterpret_cast<char*>(&pointMap[4*i]),
sizeof(
float)*3);
419 if (image ==
nullptr) {
422 const unsigned char* col = &image[y*imageRowStride + 3*x];
423 strm.write(reinterpret_cast<const char*>(col), 3*
sizeof(*col));
425 const unsigned char* col = &image[y*imageRowStride + x];
426 unsigned char writeData[3] = {*col, *col, *col};
427 strm.write(reinterpret_cast<const char*>(writeData),
sizeof(writeData));
429 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
430 unsigned char writeData[3] = {
431 (
unsigned char)(*col >> 4),
432 (
unsigned char)(*col >> 4),
433 (
unsigned char)(*col >> 4)
435 strm.write(reinterpret_cast<const char*>(writeData),
sizeof(writeData));
439 if(std::isfinite(pointMap[4*i + 2])) {
440 strm << pointMap[4*i]
441 <<
" " << pointMap[4*i + 1]
442 <<
" " << pointMap[4*i + 2];
444 strm <<
"NaN NaN NaN";
447 if (image ==
nullptr) {
451 const unsigned char* col = &image[y*imageRowStride + 3*x];
452 strm <<
" " <<
static_cast<int>(col[0])
453 <<
" " << static_cast<int>(col[1])
454 <<
" " <<
static_cast<int>(col[2]) << endl;
456 const unsigned char* col = &image[y*imageRowStride + x];
457 strm <<
" " <<
static_cast<int>(*col)
458 <<
" " <<
static_cast<int>(*col)
459 <<
" " <<
static_cast<int>(*col) << endl;
461 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
462 strm <<
" " <<
static_cast<int>(*col >> 4)
463 <<
" " << static_cast<int>(*col >> 4)
464 <<
" " <<
static_cast<int>(*col >> 4) << endl;
471 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const ImageSet& imageSet,
472 double maxZ,
bool binary) {
473 int indexDisp = imageSet.
getIndexOf(ImageSet::IMAGE_DISPARITY);
474 int indexImg = imageSet.
getIndexOf(ImageSet::IMAGE_LEFT);
475 if(indexDisp == -1) {
476 throw std::runtime_error(
"No disparity channel present, cannot create point map!");
479 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
484 (indexImg == -1) ? nullptr : imageSet.
getPixelData(indexImg),
int getRowStride(int imageNumber) const
Returns the row stride for the pixel data of one image.
const float * getQMatrix() const
Returns a pointer to the disparity-to-depth mapping matrix q.
int getIndexOf(ImageType what, bool throwIfNotFound=false) const
Returns the index of a specific image type.
int getHeight() const
Returns the height of each image.
ImageFormat
Image formats that can be transferred.
unsigned char * getPixelData(int imageNumber) const
Returns the pixel data for the given image.
void projectSinglePoint(int imageX, int imageY, unsigned short disparity, const float *q, float &pointX, float &pointY, float &pointZ)
Reconstructs the 3D location of one individual point.
bool hasImageType(ImageType what) const
Returns whether a left camera image is included in the enabled data.
A set of one to three images, but usually two (the left camera image and the disparity map)...
ImageFormat getPixelFormat(int imageNumber) const
Returns the pixel format for the given image.
float * createPointMap(const unsigned short *dispMap, int width, int height, int rowStride, const float *q, unsigned short minDisparity=1)
Reconstructs the 3D location of each pixel in the given disparity map.
void writePlyFile(const char *file, const unsigned short *dispMap, const unsigned char *image, int width, int height, ImageSet::ImageFormat format, int dispRowStride, int imageRowStride, const float *q, double maxZ=std::numeric_limits< double >::max(), bool binary=false)
Projects the given disparity map to 3D points and exports the result to a PLY file.
int getWidth() const
Returns the width of each image.