15 #include "reconstruct3d.h" 16 #include "visiontransfer/alignedallocator.h" 26 #include <immintrin.h> 28 #include <emmintrin.h> 39 class Reconstruct3D::Pimpl {
43 float* createPointMap(
const unsigned short* dispMap,
int width,
int height,
44 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor);
46 float* createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity);
48 void projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
const float* q,
49 float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor);
51 void writePlyFile(
const char* file,
const unsigned short* dispMap,
53 int dispRowStride,
int imageRowStride,
const float* q,
54 double maxZ,
bool binary,
int subpixelFactor);
56 void writePlyFile(
const char* file,
const ImageSet& imageSet,
57 double maxZ,
bool binary);
60 std::vector<float, AlignedAllocator<float> > pointMap;
62 float* createPointMapFallback(
const unsigned short* dispMap,
int width,
int height,
63 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor);
65 float* createPointMapSSE2(
const unsigned short* dispMap,
int width,
int height,
66 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor);
68 float* createPointMapAVX2(
const unsigned short* dispMap,
int width,
int height,
69 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor);
74 Reconstruct3D::Reconstruct3D()
78 Reconstruct3D::~Reconstruct3D() {
83 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor) {
84 return pimpl->createPointMap(dispMap, width, height, rowStride, q, minDisparity, subpixelFactor);
88 return pimpl->createPointMap(imageSet, minDisparity);
92 const float* q,
float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor) {
93 pimpl->projectSinglePoint(imageX, imageY, disparity, q, pointX, pointY, pointZ,
99 int imageRowStride,
const float* q,
double maxZ,
bool binary,
int subpixelFactor) {
100 pimpl->writePlyFile(file, dispMap, image, width, height, format, dispRowStride,
101 imageRowStride, q, maxZ, binary, subpixelFactor);
105 double maxZ,
bool binary) {
106 pimpl->writePlyFile(file, imageSet, maxZ, binary);
111 Reconstruct3D::Pimpl::Pimpl() {
114 float* Reconstruct3D::Pimpl::createPointMap(
const unsigned short* dispMap,
int width,
115 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor) {
118 if(pointMap.size() !=
static_cast<unsigned int>(4*width*height)) {
119 pointMap.resize(4*width*height);
123 if(width % 16 == 0 && (uintptr_t)dispMap % 32 == 0) {
124 return createPointMapAVX2(dispMap, width, height, rowStride, q, minDisparity, subpixelFactor);
128 if(width % 8 == 0 && (uintptr_t)dispMap % 16 == 0) {
129 return createPointMapSSE2(dispMap, width, height, rowStride, q, minDisparity, subpixelFactor);
132 return createPointMapFallback(dispMap, width, height, rowStride, q, minDisparity, subpixelFactor);
135 float* Reconstruct3D::Pimpl::createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity) {
137 throw std::runtime_error(
"ImageSet does not contain a disparity map!");
141 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
149 float* Reconstruct3D::Pimpl::createPointMapFallback(
const unsigned short* dispMap,
int width,
150 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor) {
152 float* outputPtr = &pointMap[0];
153 int stride = rowStride / 2;
155 for(
int y = 0; y < height; y++) {
156 double qx = q[1]*y + q[3];
157 double qy = q[5]*y + q[7];
158 double qz = q[9]*y + q[11];
159 double qw = q[13]*y + q[15];
161 for(
int x = 0; x < width; x++) {
162 unsigned short intDisp = std::max(minDisparity, dispMap[y*stride + x]);
163 if(intDisp >= 0xFFF) {
164 intDisp = minDisparity;
167 double d = intDisp / double(subpixelFactor);
168 double w = qw + q[14]*d;
170 *outputPtr =
static_cast<float>((qx + q[2]*d)/w);
173 *outputPtr =
static_cast<float>((qy + q[6]*d)/w);
176 *outputPtr =
static_cast<float>((qz + q[10]*d)/w);
188 void Reconstruct3D::Pimpl::projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
189 const float* q,
float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor) {
191 double d = disparity / double(subpixelFactor);
192 double w = q[15] + q[14]*d;
193 pointX =
static_cast<float>((imageX*q[0] + q[3])/w);
194 pointY =
static_cast<float>((imageY*q[5] + q[7])/w);
195 pointZ =
static_cast<float>(q[11]/w);
199 float* Reconstruct3D::Pimpl::createPointMapAVX2(
const unsigned short* dispMap,
int width,
200 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor) {
203 const __m256 qCol0 = _mm256_setr_ps(q[0], q[4], q[8], q[12], q[0], q[4], q[8], q[12]);
204 const __m256 qCol1 = _mm256_setr_ps(q[1], q[5], q[9], q[13], q[1], q[5], q[9], q[13]);
205 const __m256 qCol2 = _mm256_setr_ps(q[2], q[6], q[10], q[14], q[2], q[6], q[10], q[14]);
206 const __m256 qCol3 = _mm256_setr_ps(q[3], q[7], q[11], q[15], q[3], q[7], q[11], q[15]);
209 const __m256i minDispVector = _mm256_set1_epi16(minDisparity);
210 const __m256i maxDispVector = _mm256_set1_epi16(0xFFF);
211 const __m256 scaleVector = _mm256_set1_ps(1.0/
double(subpixelFactor));
212 const __m256i zeroVector = _mm256_set1_epi16(0);
214 float* outputPtr = &pointMap[0];
216 for(
int y = 0; y < height; y++) {
217 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
218 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
221 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 32) {
222 __m256i disparities = _mm256_load_si256(reinterpret_cast<const __m256i*>(ptr));
225 __m256i validMask = _mm256_cmpgt_epi16(maxDispVector, disparities);
226 disparities = _mm256_and_si256(validMask, disparities);
229 disparities = _mm256_max_epi16(disparities, minDispVector);
232 __m256i disparitiesMixup = _mm256_permute4x64_epi64(disparities, 0xd8);
235 __m256 floatDisp = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(disparitiesMixup, zeroVector));
236 __m256 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
240 __declspec(align(32))
float dispArray[16];
242 float dispArray[16]__attribute__((aligned(32)));
244 _mm256_store_ps(&dispArray[0], dispScaled);
247 floatDisp = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(disparitiesMixup, zeroVector));
248 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
249 _mm256_store_ps(&dispArray[8], dispScaled);
252 for(
int i=0; i<16; i+=2) {
254 __m256 vec = _mm256_setr_ps(x, y, dispArray[i], 1.0,
255 x+1, y, dispArray[i+1], 1.0);
258 __m256 u1 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
259 __m256 u2 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
260 __m256 u3 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
261 __m256 u4 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
263 __m256 prod1 = _mm256_mul_ps(u1, qCol0);
264 __m256 prod2 = _mm256_mul_ps(u2, qCol1);
265 __m256 prod3 = _mm256_mul_ps(u3, qCol2);
266 __m256 prod4 = _mm256_mul_ps(u4, qCol3);
268 __m256 multResult = _mm256_add_ps(_mm256_add_ps(prod1, prod2), _mm256_add_ps(prod3, prod4));
271 __m256 point = _mm256_div_ps(multResult,
272 _mm256_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
275 _mm256_store_ps(outputPtr, point);
288 float* Reconstruct3D::Pimpl::createPointMapSSE2(
const unsigned short* dispMap,
int width,
289 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor) {
292 const __m128 qCol0 = _mm_setr_ps(q[0], q[4], q[8], q[12]);
293 const __m128 qCol1 = _mm_setr_ps(q[1], q[5], q[9], q[13]);
294 const __m128 qCol2 = _mm_setr_ps(q[2], q[6], q[10], q[14]);
295 const __m128 qCol3 = _mm_setr_ps(q[3], q[7], q[11], q[15]);
298 const __m128i minDispVector = _mm_set1_epi16(minDisparity);
299 const __m128i maxDispVector = _mm_set1_epi16(0xFFF);
300 const __m128 scaleVector = _mm_set1_ps(1.0/
double(subpixelFactor));
301 const __m128i zeroVector = _mm_set1_epi16(0);
303 float* outputPtr = &pointMap[0];
305 for(
int y = 0; y < height; y++) {
306 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
307 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
310 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 16) {
311 __m128i disparities = _mm_load_si128(reinterpret_cast<const __m128i*>(ptr));
314 __m128i validMask = _mm_cmplt_epi16(disparities, maxDispVector);
315 disparities = _mm_and_si128(validMask, disparities);
318 disparities = _mm_max_epi16(disparities, minDispVector);
321 __m128 floatDisp = _mm_cvtepi32_ps(_mm_unpacklo_epi16(disparities, zeroVector));
322 __m128 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
326 __declspec(align(16))
float dispArray[8];
328 float dispArray[8]__attribute__((aligned(16)));
330 _mm_store_ps(&dispArray[0], dispScaled);
333 floatDisp = _mm_cvtepi32_ps(_mm_unpackhi_epi16(disparities, zeroVector));
334 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
335 _mm_store_ps(&dispArray[4], dispScaled);
338 for(
int i=0; i<8; i++) {
340 __m128 vec = _mm_setr_ps(static_cast<float>(x), static_cast<float>(y), dispArray[i], 1.0);
343 __m128 u1 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
344 __m128 u2 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
345 __m128 u3 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
346 __m128 u4 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
348 __m128 prod1 = _mm_mul_ps(u1, qCol0);
349 __m128 prod2 = _mm_mul_ps(u2, qCol1);
350 __m128 prod3 = _mm_mul_ps(u3, qCol2);
351 __m128 prod4 = _mm_mul_ps(u4, qCol3);
353 __m128 multResult = _mm_add_ps(_mm_add_ps(prod1, prod2), _mm_add_ps(prod3, prod4));
356 __m128 point = _mm_div_ps(multResult,
357 _mm_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
360 _mm_store_ps(outputPtr, point);
372 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const unsigned short* dispMap,
374 int imageRowStride,
const float* q,
double maxZ,
bool binary,
int subpixelFactor) {
376 float* pointMap =
createPointMap(dispMap, width, height, dispRowStride, q, 0, subpixelFactor);
381 for(
int i=0; i<width*height; i++) {
382 if(pointMap[4*i+2] <= maxZ) {
387 pointsCount = width*height;
391 fstream strm(file, binary ? (ios::out | ios::binary) : ios::out);
392 strm <<
"ply" << endl;
395 strm <<
"format binary_little_endian 1.0" << endl;
397 strm <<
"format ascii 1.0" << endl;
400 strm <<
"element vertex " << pointsCount << endl
401 <<
"property float x" << endl
402 <<
"property float y" << endl
403 <<
"property float z" << endl;
404 if (image !=
nullptr) {
406 strm <<
"property uchar red" << endl
407 <<
"property uchar green" << endl
408 <<
"property uchar blue" << endl;
410 strm <<
"end_header" << endl;
413 for(
int i=0; i<width*height; i++) {
417 if(maxZ < 0 || pointMap[4*i+2] <= maxZ) {
420 strm.write(reinterpret_cast<char*>(&pointMap[4*i]),
sizeof(
float)*3);
421 if (image ==
nullptr) {
424 const unsigned char* col = &image[y*imageRowStride + 3*x];
425 strm.write(reinterpret_cast<const char*>(col), 3*
sizeof(*col));
427 const unsigned char* col = &image[y*imageRowStride + x];
428 unsigned char writeData[3] = {*col, *col, *col};
429 strm.write(reinterpret_cast<const char*>(writeData),
sizeof(writeData));
431 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
432 unsigned char writeData[3] = {
433 (
unsigned char)(*col >> 4),
434 (
unsigned char)(*col >> 4),
435 (
unsigned char)(*col >> 4)
437 strm.write(reinterpret_cast<const char*>(writeData),
sizeof(writeData));
441 if(std::isfinite(pointMap[4*i + 2])) {
442 strm << pointMap[4*i]
443 <<
" " << pointMap[4*i + 1]
444 <<
" " << pointMap[4*i + 2];
446 strm <<
"NaN NaN NaN";
449 if (image ==
nullptr) {
453 const unsigned char* col = &image[y*imageRowStride + 3*x];
454 strm <<
" " <<
static_cast<int>(col[0])
455 <<
" " << static_cast<int>(col[1])
456 <<
" " <<
static_cast<int>(col[2]) << endl;
458 const unsigned char* col = &image[y*imageRowStride + x];
459 strm <<
" " <<
static_cast<int>(*col)
460 <<
" " <<
static_cast<int>(*col)
461 <<
" " <<
static_cast<int>(*col) << endl;
463 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
464 strm <<
" " <<
static_cast<int>(*col >> 4)
465 <<
" " << static_cast<int>(*col >> 4)
466 <<
" " <<
static_cast<int>(*col >> 4) << endl;
473 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const ImageSet& imageSet,
474 double maxZ,
bool binary) {
475 int indexDisp = imageSet.
getIndexOf(ImageSet::IMAGE_DISPARITY);
476 int indexImg = imageSet.
getIndexOf(ImageSet::IMAGE_LEFT);
477 if(indexDisp == -1) {
478 throw std::runtime_error(
"No disparity channel present, cannot create point map!");
481 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
486 (indexImg == -1) ? nullptr : imageSet.
getPixelData(indexImg),
int getRowStride(int imageNumber) const
Returns the row stride for the pixel data of one image.
const float * getQMatrix() const
Returns a pointer to the disparity-to-depth mapping matrix q.
int getIndexOf(ImageType what, bool throwIfNotFound=false) const
Returns the index of a specific image type.
int getHeight() const
Returns the height of each image.
int getSubpixelFactor() const
Gets the subpixel factor for this image set.
ImageFormat
Image formats that can be transferred.
unsigned char * getPixelData(int imageNumber) const
Returns the pixel data for the given image.
void projectSinglePoint(int imageX, int imageY, unsigned short disparity, const float *q, float &pointX, float &pointY, float &pointZ, int subpixelFactor=16)
Reconstructs the 3D location of one individual point.
bool hasImageType(ImageType what) const
Returns whether a left camera image is included in the enabled data.
A set of one to three images, but usually two (the left camera image and the disparity map)...
float * createPointMap(const unsigned short *dispMap, int width, int height, int rowStride, const float *q, unsigned short minDisparity=1, int subpixelFactor=16)
Reconstructs the 3D location of each pixel in the given disparity map.
ImageFormat getPixelFormat(int imageNumber) const
Returns the pixel format for the given image.
void writePlyFile(const char *file, const unsigned short *dispMap, const unsigned char *image, int width, int height, ImageSet::ImageFormat format, int dispRowStride, int imageRowStride, const float *q, double maxZ=std::numeric_limits< double >::max(), bool binary=false, int subpixelFactor=16)
Projects the given disparity map to 3D points and exports the result to a PLY file.
int getWidth() const
Returns the width of each image.