Point Cloud Library (PCL)
1.7.0
|
00001 /* 00002 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho 00003 All rights reserved. 00004 00005 Redistribution and use in source and binary forms, with or without modification, 00006 are permitted provided that the following conditions are met: 00007 00008 Redistributions of source code must retain the above copyright notice, this list of 00009 conditions and the following disclaimer. Redistributions in binary form must reproduce 00010 the above copyright notice, this list of conditions and the following disclaimer 00011 in the documentation and/or other materials provided with the distribution. 00012 00013 Neither the name of the Johns Hopkins University nor the names of its contributors 00014 may be used to endorse or promote products derived from this software without specific 00015 prior written permission. 00016 00017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 00018 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 00019 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 00020 SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00021 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00022 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 00023 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00024 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00025 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 00026 DAMAGE. 00027 */ 00028 00029 #include <float.h> 00030 #ifdef _WIN32 00031 # ifndef WIN32_LEAN_AND_MEAN 00032 # define WIN32_LEAN_AND_MEAN 00033 # endif // WIN32_LEAN_AND_MEAN 00034 # ifndef NOMINMAX 00035 # define NOMINMAX 00036 # endif // NOMINMAX 00037 # include <windows.h> 00038 #endif //_WIN32 00039 00040 /////////////////// 00041 // SparseMatrix // 00042 /////////////////// 00043 /////////////////////////////////////////// 00044 // Static Allocator Methods and Memebers // 00045 /////////////////////////////////////////// 00046 00047 namespace pcl 00048 { 00049 namespace poisson 00050 { 00051 00052 00053 template<class T> int SparseMatrix<T>::UseAlloc=0; 00054 template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator; 00055 template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;} 00056 template<class T> 00057 void SparseMatrix<T>::SetAllocator( int blockSize ) 00058 { 00059 if(blockSize>0){ 00060 UseAlloc=1; 00061 internalAllocator.set(blockSize); 00062 } 00063 else{UseAlloc=0;} 00064 } 00065 /////////////////////////////////////// 00066 // SparseMatrix Methods and Memebers // 00067 /////////////////////////////////////// 00068 00069 template< class T > 00070 SparseMatrix< T >::SparseMatrix( void ) 00071 { 00072 _contiguous = false; 00073 _maxEntriesPerRow = 0; 00074 rows = 0; 00075 rowSizes = NULL; 00076 m_ppElements = NULL; 00077 } 00078 00079 template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); } 00080 template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); } 00081 00082 template< class T > 00083 SparseMatrix< T >::SparseMatrix( const SparseMatrix& M ) : SparseMatrix< T >() 00084 { 00085 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow ); 00086 else Resize( M.rows ); 00087 for( int i=0 ; i<rows ; i++ ) 00088 { 00089 SetRowSize( i , M.rowSizes[i] ); 00090 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] ); 00091 } 00092 } 00093 template<class T> 00094 int SparseMatrix<T>::Entries( void ) const 00095 { 00096 int e = 0; 00097 for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] ); 00098 return e; 00099 } 00100 template<class T> 00101 SparseMatrix<T>& SparseMatrix<T>::operator = (const SparseMatrix<T>& M) 00102 { 00103 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow ); 00104 else Resize( M.rows ); 00105 for( int i=0 ; i<rows ; i++ ) 00106 { 00107 SetRowSize( i , M.rowSizes[i] ); 00108 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] ); 00109 } 00110 return *this; 00111 } 00112 00113 template<class T> 00114 SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); } 00115 00116 template< class T > 00117 bool SparseMatrix< T >::write( const char* fileName ) const 00118 { 00119 FILE* fp = fopen( fileName , "wb" ); 00120 if( !fp ) return false; 00121 bool ret = write( fp ); 00122 fclose( fp ); 00123 return ret; 00124 } 00125 template< class T > 00126 bool SparseMatrix< T >::read( const char* fileName ) 00127 { 00128 FILE* fp = fopen( fileName , "rb" ); 00129 if( !fp ) return false; 00130 bool ret = read( fp ); 00131 fclose( fp ); 00132 return ret; 00133 } 00134 template< class T > 00135 bool SparseMatrix< T >::write( FILE* fp ) const 00136 { 00137 if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false; 00138 if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false; 00139 for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false; 00140 return true; 00141 } 00142 template< class T > 00143 bool SparseMatrix< T >::read( FILE* fp ) 00144 { 00145 int r; 00146 if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false; 00147 Resize( r ); 00148 if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false; 00149 for( int i=0 ; i<rows ; i++ ) 00150 { 00151 r = rowSizes[i]; 00152 rowSizes[i] = 0; 00153 SetRowSize( i , r ); 00154 if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false; 00155 } 00156 return true; 00157 } 00158 00159 00160 template< class T > 00161 void SparseMatrix< T >::Resize( int r ) 00162 { 00163 if( rows>0 ) 00164 { 00165 00166 if( !UseAlloc ) 00167 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); } 00168 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); } 00169 free( m_ppElements ); 00170 free( rowSizes ); 00171 } 00172 rows = r; 00173 if( r ) 00174 { 00175 rowSizes = ( int* )malloc( sizeof( int ) * r ); 00176 memset( rowSizes , 0 , sizeof( int ) * r ); 00177 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r ); 00178 } 00179 _contiguous = false; 00180 _maxEntriesPerRow = 0; 00181 } 00182 template< class T > 00183 void SparseMatrix< T >::Resize( int r , int e ) 00184 { 00185 if( rows>0 ) 00186 { 00187 if( !UseAlloc ) 00188 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); } 00189 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); } 00190 free( m_ppElements ); 00191 free( rowSizes ); 00192 } 00193 rows = r; 00194 if( r ) 00195 { 00196 rowSizes = ( int* )malloc( sizeof( int ) * r ); 00197 memset( rowSizes , 0 , sizeof( int ) * r ); 00198 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r ); 00199 m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e ); 00200 for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e; 00201 } 00202 _contiguous = true; 00203 _maxEntriesPerRow = e; 00204 } 00205 00206 template<class T> 00207 void SparseMatrix< T >::SetRowSize( int row , int count ) 00208 { 00209 if( _contiguous ) 00210 { 00211 if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 ); 00212 rowSizes[row] = count; 00213 } 00214 else if( row>=0 && row<rows ) 00215 { 00216 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count); 00217 else 00218 { 00219 if( rowSizes[row] ) free( m_ppElements[row] ); 00220 if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count ); 00221 } 00222 } 00223 } 00224 00225 00226 template<class T> 00227 void SparseMatrix<T>::SetZero() 00228 { 00229 Resize(this->m_N, this->m_M); 00230 } 00231 00232 template<class T> 00233 void SparseMatrix<T>::SetIdentity() 00234 { 00235 SetZero(); 00236 for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++) 00237 (*this)(ij,ij) = T(1); 00238 } 00239 00240 template<class T> 00241 SparseMatrix<T> SparseMatrix<T>::operator * (const T& V) const 00242 { 00243 SparseMatrix<T> M(*this); 00244 M *= V; 00245 return M; 00246 } 00247 00248 template<class T> 00249 SparseMatrix<T>& SparseMatrix<T>::operator *= (const T& V) 00250 { 00251 for (int i=0; i<this->Rows(); i++) 00252 { 00253 for(int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;} 00254 } 00255 return *this; 00256 } 00257 00258 template<class T> 00259 SparseMatrix<T> SparseMatrix<T>::Multiply( const SparseMatrix<T>& M ) const 00260 { 00261 SparseMatrix<T> R( this->Rows(), M.Columns() ); 00262 for(int i=0; i<R.Rows(); i++){ 00263 for(int ii=0;ii<m_ppElements[i].size();ii++){ 00264 int N=m_ppElements[i][ii].N; 00265 T Value=m_ppElements[i][ii].Value; 00266 for(int jj=0;jj<M.m_ppElements[N].size();jj++){ 00267 R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value; 00268 } 00269 } 00270 } 00271 return R; 00272 } 00273 00274 template<class T> 00275 template<class T2> 00276 Vector<T2> SparseMatrix<T>::Multiply( const Vector<T2>& V ) const 00277 { 00278 Vector<T2> R( rows ); 00279 00280 for (int i=0; i<rows; i++) 00281 { 00282 T2 temp=T2(); 00283 for(int ii=0;ii<rowSizes[i];ii++){ 00284 temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N]; 00285 } 00286 R(i)=temp; 00287 } 00288 return R; 00289 } 00290 00291 template<class T> 00292 template<class T2> 00293 void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const 00294 { 00295 #pragma omp parallel for num_threads( threads ) schedule( static ) 00296 for( int i=0 ; i<rows ; i++ ) 00297 { 00298 T2 temp = T2(); 00299 temp *= 0; 00300 for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N]; 00301 Out.m_pV[i] = temp; 00302 } 00303 } 00304 00305 template<class T> 00306 SparseMatrix<T> SparseMatrix<T>::operator * (const SparseMatrix<T>& M) const 00307 { 00308 return Multiply(M); 00309 } 00310 template<class T> 00311 template<class T2> 00312 Vector<T2> SparseMatrix<T>::operator * (const Vector<T2>& V) const 00313 { 00314 return Multiply(V); 00315 } 00316 00317 template<class T> 00318 SparseMatrix<T> SparseMatrix<T>::Transpose() const 00319 { 00320 SparseMatrix<T> M( this->Columns(), this->Rows() ); 00321 00322 for (int i=0; i<this->Rows(); i++) 00323 { 00324 for(int ii=0;ii<m_ppElements[i].size();ii++){ 00325 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value; 00326 } 00327 } 00328 return M; 00329 } 00330 00331 template<class T> 00332 template<class T2> 00333 int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads ) 00334 { 00335 if( reset ) 00336 { 00337 solution.Resize( b.Dimensions() ); 00338 solution.SetZero(); 00339 } 00340 Vector< T2 > r; 00341 r.Resize( solution.Dimensions() ); 00342 M.Multiply( solution , r ); 00343 r = b - r; 00344 Vector< T2 > d = r; 00345 double delta_new , delta_0; 00346 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i]; 00347 delta_0 = delta_new; 00348 if( delta_new<eps ) return 0; 00349 int ii; 00350 Vector< T2 > q; 00351 q.Resize( d.Dimensions() ); 00352 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ ) 00353 { 00354 M.Multiply( d , q , threads ); 00355 double dDotQ = 0 , alpha = 0; 00356 for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i]; 00357 alpha = delta_new / dDotQ; 00358 #pragma omp parallel for num_threads( threads ) schedule( static ) 00359 for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha ); 00360 if( !(ii%50) ) 00361 { 00362 r.Resize( solution.Dimensions() ); 00363 M.Multiply( solution , r , threads ); 00364 r = b - r; 00365 } 00366 else 00367 #pragma omp parallel for num_threads( threads ) schedule( static ) 00368 for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha); 00369 00370 double delta_old = delta_new , beta; 00371 delta_new = 0; 00372 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i]; 00373 beta = delta_new / delta_old; 00374 #pragma omp parallel for num_threads( threads ) schedule( static ) 00375 for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta ); 00376 } 00377 return ii; 00378 } 00379 00380 // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b) 00381 template<class T> 00382 int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){ 00383 SparseMatrix mTranspose=M.Transpose(); 00384 Vector<T> bb=mTranspose*b; 00385 Vector<T> d,r,Md; 00386 T alpha,beta,rDotR; 00387 int i; 00388 00389 solution.Resize(M.Columns()); 00390 solution.SetZero(); 00391 00392 d=r=bb; 00393 rDotR=r.Dot(r); 00394 for(i=0;i<iters && rDotR>eps;i++){ 00395 T temp; 00396 Md=mTranspose*(M*d); 00397 alpha=rDotR/d.Dot(Md); 00398 solution+=d*alpha; 00399 r-=Md*alpha; 00400 temp=r.Dot(r); 00401 beta=temp/rDotR; 00402 rDotR=temp; 00403 d=r+d*beta; 00404 } 00405 return i; 00406 } 00407 00408 00409 00410 00411 /////////////////////////// 00412 // SparseSymmetricMatrix // 00413 /////////////////////////// 00414 template<class T> 00415 template<class T2> 00416 Vector<T2> SparseSymmetricMatrix<T>::operator * (const Vector<T2>& V) const {return Multiply(V);} 00417 template<class T> 00418 template<class T2> 00419 Vector<T2> SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& V ) const 00420 { 00421 Vector<T2> R( SparseMatrix<T>::rows ); 00422 00423 for(int i=0; i<SparseMatrix<T>::rows; i++){ 00424 for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){ 00425 int j=SparseMatrix<T>::m_ppElements[i][ii].N; 00426 R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j]; 00427 R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i]; 00428 } 00429 } 00430 return R; 00431 } 00432 00433 template<class T> 00434 template<class T2> 00435 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const 00436 { 00437 Out.SetZero(); 00438 const T2* in = &In[0]; 00439 T2* out = &Out[0]; 00440 T2 dcTerm = T2( 0 ); 00441 if( addDCTerm ) 00442 { 00443 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i]; 00444 dcTerm /= SparseMatrix<T>::rows; 00445 } 00446 for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ ) 00447 { 00448 const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i]; 00449 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i]; 00450 const T2& in_i_ = in[i]; 00451 T2 out_i = T2(0); 00452 for( ; temp!=end ; temp++ ) 00453 { 00454 int j=temp->N; 00455 T2 v=temp->Value; 00456 out_i += v * in[j]; 00457 out[j] += v * in_i_; 00458 } 00459 out[i] += out_i; 00460 } 00461 if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm; 00462 } 00463 template<class T> 00464 template<class T2> 00465 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const 00466 { 00467 int dim = int( In.Dimensions() ); 00468 const T2* in = &In[0]; 00469 int threads = OutScratch.threads(); 00470 if( addDCTerm ) 00471 { 00472 T2 dcTerm = 0; 00473 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm ) 00474 for( int t=0 ; t<threads ; t++ ) 00475 { 00476 T2* out = OutScratch[t]; 00477 memset( out , 0 , sizeof( T2 ) * dim ); 00478 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ ) 00479 { 00480 const T2& in_i_ = in[i]; 00481 T2& out_i_ = out[i]; 00482 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ ) 00483 { 00484 int j = temp->N; 00485 T2 v = temp->Value; 00486 out_i_ += v * in[j]; 00487 out[j] += v * in_i_; 00488 } 00489 dcTerm += in_i_; 00490 } 00491 } 00492 dcTerm /= dim; 00493 dim = int( Out.Dimensions() ); 00494 T2* out = &Out[0]; 00495 #pragma omp parallel for num_threads( threads ) schedule( static ) 00496 for( int i=0 ; i<dim ; i++ ) 00497 { 00498 T2 _out = dcTerm; 00499 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i]; 00500 out[i] = _out; 00501 } 00502 } 00503 else 00504 { 00505 #pragma omp parallel for num_threads( threads ) 00506 for( int t=0 ; t<threads ; t++ ) 00507 { 00508 T2* out = OutScratch[t]; 00509 memset( out , 0 , sizeof( T2 ) * dim ); 00510 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ ) 00511 { 00512 const T2& in_i_ = in[i]; 00513 T2& out_i_ = out[i]; 00514 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ ) 00515 { 00516 int j = temp->N; 00517 T2 v = temp->Value; 00518 out_i_ += v * in[j]; 00519 out[j] += v * in_i_; 00520 } 00521 } 00522 } 00523 dim = int( Out.Dimensions() ); 00524 T2* out = &Out[0]; 00525 #pragma omp parallel for num_threads( threads ) schedule( static ) 00526 for( int i=0 ; i<dim ; i++ ) 00527 { 00528 T2 _out = T2(0); 00529 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i]; 00530 out[i] = _out; 00531 } 00532 } 00533 } 00534 template<class T> 00535 template<class T2> 00536 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const 00537 { 00538 int dim = In.Dimensions(); 00539 const T2* in = &In[0]; 00540 int threads = OutScratch.size(); 00541 #pragma omp parallel for num_threads( threads ) 00542 for( int t=0 ; t<threads ; t++ ) 00543 for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0); 00544 #pragma omp parallel for num_threads( threads ) 00545 for( int t=0 ; t<threads ; t++ ) 00546 { 00547 T2* out = OutScratch[t]; 00548 for( int i=bounds[t] ; i<bounds[t+1] ; i++ ) 00549 { 00550 const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i]; 00551 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i]; 00552 const T2& in_i_ = in[i]; 00553 T2& out_i_ = out[i]; 00554 for( ; temp!=end ; temp++ ) 00555 { 00556 int j = temp->N; 00557 T2 v = temp->Value; 00558 out_i_ += v * in[j]; 00559 out[j] += v * in_i_; 00560 } 00561 } 00562 } 00563 T2* out = &Out[0]; 00564 #pragma omp parallel for num_threads( threads ) schedule( static ) 00565 for( int i=0 ; i<Out.Dimensions() ; i++ ) 00566 { 00567 T2& _out = out[i]; 00568 _out = T2(0); 00569 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i]; 00570 } 00571 } 00572 #if defined _WIN32 && !defined __MINGW32__ 00573 #ifndef _AtomicIncrement_ 00574 #define _AtomicIncrement_ 00575 inline void AtomicIncrement( volatile float* ptr , float addend ) 00576 { 00577 float newValue = *ptr; 00578 LONG& _newValue = *( (LONG*)&newValue ); 00579 LONG _oldValue; 00580 for( ;; ) 00581 { 00582 _oldValue = _newValue; 00583 newValue += addend; 00584 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue ); 00585 if( _newValue==_oldValue ) break; 00586 } 00587 } 00588 inline void AtomicIncrement( volatile double* ptr , double addend ) 00589 //inline void AtomicIncrement( double* ptr , double addend ) 00590 { 00591 double newValue = *ptr; 00592 LONGLONG& _newValue = *( (LONGLONG*)&newValue ); 00593 LONGLONG _oldValue; 00594 do 00595 { 00596 _oldValue = _newValue; 00597 newValue += addend; 00598 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue ); 00599 } 00600 while( _newValue!=_oldValue ); 00601 } 00602 #endif // _AtomicIncrement_ 00603 template< class T > 00604 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL ) 00605 { 00606 Out.SetZero(); 00607 const float* in = &In[0]; 00608 float* out = &Out[0]; 00609 if( partition ) 00610 #pragma omp parallel for num_threads( threads ) 00611 for( int t=0 ; t<threads ; t++ ) 00612 for( int i=partition[t] ; i<partition[t+1] ; i++ ) 00613 { 00614 const MatrixEntry< T >* temp = A[i]; 00615 const MatrixEntry< T >* end = temp + A.rowSizes[i]; 00616 const float& in_i = in[i]; 00617 float out_i = 0.; 00618 for( ; temp!=end ; temp++ ) 00619 { 00620 int j = temp->N; 00621 float v = temp->Value; 00622 out_i += v * in[j]; 00623 AtomicIncrement( out+j , v * in_i ); 00624 } 00625 AtomicIncrement( out+i , out_i ); 00626 } 00627 else 00628 #pragma omp parallel for num_threads( threads ) 00629 for( int i=0 ; i<A.rows ; i++ ) 00630 { 00631 const MatrixEntry< T >* temp = A[i]; 00632 const MatrixEntry< T >* end = temp + A.rowSizes[i]; 00633 const float& in_i = in[i]; 00634 float out_i = 0.f; 00635 for( ; temp!=end ; temp++ ) 00636 { 00637 int j = temp->N; 00638 float v = temp->Value; 00639 out_i += v * in[j]; 00640 AtomicIncrement( out+j , v * in_i ); 00641 } 00642 AtomicIncrement( out+i , out_i ); 00643 } 00644 } 00645 template< class T > 00646 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL ) 00647 { 00648 Out.SetZero(); 00649 const double* in = &In[0]; 00650 double* out = &Out[0]; 00651 00652 if( partition ) 00653 #pragma omp parallel for num_threads( threads ) 00654 for( int t=0 ; t<threads ; t++ ) 00655 for( int i=partition[t] ; i<partition[t+1] ; i++ ) 00656 { 00657 const MatrixEntry< T >* temp = A[i]; 00658 const MatrixEntry< T >* end = temp + A.rowSizes[i]; 00659 const double& in_i = in[i]; 00660 double out_i = 0.; 00661 for( ; temp!=end ; temp++ ) 00662 { 00663 int j = temp->N; 00664 T v = temp->Value; 00665 out_i += v * in[j]; 00666 AtomicIncrement( out+j , v * in_i ); 00667 } 00668 AtomicIncrement( out+i , out_i ); 00669 } 00670 else 00671 #pragma omp parallel for num_threads( threads ) 00672 for( int i=0 ; i<A.rows ; i++ ) 00673 { 00674 const MatrixEntry< T >* temp = A[i]; 00675 const MatrixEntry< T >* end = temp + A.rowSizes[i]; 00676 const double& in_i = in[i]; 00677 double out_i = 0.; 00678 for( ; temp!=end ; temp++ ) 00679 { 00680 int j = temp->N; 00681 T v = temp->Value; 00682 out_i += v * in[j]; 00683 AtomicIncrement( out+j , v * in_i ); 00684 } 00685 AtomicIncrement( out+i , out_i ); 00686 } 00687 } 00688 00689 template< class T > 00690 template< class T2 > 00691 int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal ) 00692 { 00693 eps *= eps; 00694 int dim = b.Dimensions(); 00695 if( reset ) 00696 { 00697 x.Resize( dim ); 00698 x.SetZero(); 00699 } 00700 Vector< T2 > r( dim ) , d( dim ) , q( dim ); 00701 Vector< T2 > temp; 00702 if( solveNormal ) temp.Resize( dim ); 00703 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0]; 00704 const T2* _b = &b[0]; 00705 00706 std::vector< int > partition( threads+1 ); 00707 { 00708 int eCount = 0; 00709 for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i]; 00710 partition[0] = 0; 00711 #pragma omp parallel for num_threads( threads ) 00712 for( int t=0 ; t<threads ; t++ ) 00713 { 00714 int _eCount = 0; 00715 for( int i=0 ; i<A.rows ; i++ ) 00716 { 00717 _eCount += A.rowSizes[i]; 00718 if( _eCount*threads>=eCount*(t+1) ) 00719 { 00720 partition[t+1] = i; 00721 break; 00722 } 00723 } 00724 } 00725 partition[threads] = A.rows; 00726 } 00727 if( solveNormal ) 00728 { 00729 MultiplyAtomic( A , x , temp , threads , &partition[0] ); 00730 MultiplyAtomic( A , temp , r , threads , &partition[0] ); 00731 MultiplyAtomic( A , b , temp , threads , &partition[0] ); 00732 #pragma omp parallel for num_threads( threads ) schedule( static ) 00733 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i]; 00734 } 00735 else 00736 { 00737 MultiplyAtomic( A , x , r , threads , &partition[0] ); 00738 #pragma omp parallel for num_threads( threads ) schedule( static ) 00739 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i]; 00740 } 00741 double delta_new = 0 , delta_0; 00742 for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i]; 00743 delta_0 = delta_new; 00744 if( delta_new<eps ) 00745 { 00746 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps ); 00747 return 0; 00748 } 00749 int ii; 00750 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ ) 00751 { 00752 if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] ); 00753 else MultiplyAtomic( A , d , q , threads , &partition[0] ); 00754 double dDotQ = 0; 00755 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i]; 00756 T2 alpha = T2( delta_new / dDotQ ); 00757 #pragma omp parallel for num_threads( threads ) schedule( static ) 00758 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha; 00759 if( (ii%50)==(50-1) ) 00760 { 00761 r.Resize( dim ); 00762 if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] ); 00763 else MultiplyAtomic( A , x , r , threads , &partition[0] ); 00764 #pragma omp parallel for num_threads( threads ) schedule( static ) 00765 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i]; 00766 } 00767 else 00768 #pragma omp parallel for num_threads( threads ) schedule( static ) 00769 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha; 00770 00771 double delta_old = delta_new; 00772 delta_new = 0; 00773 for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i]; 00774 T2 beta = T2( delta_new / delta_old ); 00775 #pragma omp parallel for num_threads( threads ) schedule( static ) 00776 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta; 00777 } 00778 return ii; 00779 } 00780 #endif // _WIN32 && !__MINGW32__ 00781 template< class T > 00782 template< class T2 > 00783 int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal ) 00784 { 00785 int threads = scratch.threads(); 00786 eps *= eps; 00787 int dim = int( b.Dimensions() ); 00788 Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp; 00789 if( reset ) x.Resize( dim ); 00790 if( solveNormal ) temp.Resize( dim ); 00791 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0]; 00792 const T2* _b = &b[0]; 00793 00794 double delta_new = 0 , delta_0; 00795 if( solveNormal ) 00796 { 00797 A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm ); 00798 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 00799 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i]; 00800 } 00801 else 00802 { 00803 A.Multiply( x , r , scratch , addDCTerm ); 00804 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new ) 00805 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i]; 00806 } 00807 delta_0 = delta_new; 00808 if( delta_new<eps ) 00809 { 00810 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps ); 00811 return 0; 00812 } 00813 int ii; 00814 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ ) 00815 { 00816 if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm ); 00817 else A.Multiply( d , q , scratch , addDCTerm ); 00818 double dDotQ = 0; 00819 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ ) 00820 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i]; 00821 T2 alpha = T2( delta_new / dDotQ ); 00822 double delta_old = delta_new; 00823 delta_new = 0; 00824 if( (ii%50)==(50-1) ) 00825 { 00826 #pragma omp parallel for num_threads( threads ) 00827 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha; 00828 r.Resize( dim ); 00829 if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ); 00830 else A.Multiply( x , r , scratch , addDCTerm ); 00831 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 00832 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha; 00833 } 00834 else 00835 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 00836 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha; 00837 00838 T2 beta = T2( delta_new / delta_old ); 00839 #pragma omp parallel for num_threads( threads ) 00840 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta; 00841 } 00842 return ii; 00843 } 00844 template< class T > 00845 template< class T2 > 00846 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal ) 00847 { 00848 eps *= eps; 00849 int dim = int( b.Dimensions() ); 00850 MapReduceVector< T2 > outScratch; 00851 if( threads<1 ) threads = 1; 00852 if( threads>1 ) outScratch.resize( threads , dim ); 00853 if( reset ) x.Resize( dim ); 00854 Vector< T2 > r( dim ) , d( dim ) , q( dim ); 00855 Vector< T2 > temp; 00856 if( solveNormal ) temp.Resize( dim ); 00857 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0]; 00858 const T2* _b = &b[0]; 00859 00860 double delta_new = 0 , delta_0; 00861 00862 if( solveNormal ) 00863 { 00864 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm ); 00865 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm ); 00866 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 00867 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i]; 00868 } 00869 else 00870 { 00871 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm ); 00872 else A.Multiply( x , r , addDCTerm ); 00873 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 00874 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i]; 00875 } 00876 00877 delta_0 = delta_new; 00878 if( delta_new<eps ) 00879 { 00880 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps ); 00881 return 0; 00882 } 00883 int ii; 00884 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ ) 00885 { 00886 if( solveNormal ) 00887 { 00888 if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm ); 00889 else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm ); 00890 } 00891 else 00892 { 00893 if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm ); 00894 else A.Multiply( d , q , addDCTerm ); 00895 } 00896 double dDotQ = 0; 00897 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ ) 00898 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i]; 00899 T2 alpha = T2( delta_new / dDotQ ); 00900 double delta_old = delta_new; 00901 delta_new = 0; 00902 00903 if( (ii%50)==(50-1) ) 00904 { 00905 #pragma omp parallel for num_threads( threads ) 00906 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha; 00907 r.SetZero(); 00908 if( solveNormal ) 00909 { 00910 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ); 00911 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ); 00912 } 00913 else 00914 { 00915 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm ); 00916 else A.Multiply( x , r , addDCTerm ); 00917 } 00918 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new ) 00919 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha; 00920 } 00921 else 00922 { 00923 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 00924 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha; 00925 } 00926 00927 T2 beta = T2( delta_new / delta_old ); 00928 #pragma omp parallel for num_threads( threads ) 00929 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta; 00930 } 00931 return ii; 00932 } 00933 00934 template<class T> 00935 template<class T2> 00936 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset ) 00937 { 00938 Vector<T2> d,r,Md; 00939 00940 if(reset) 00941 { 00942 solution.Resize(b.Dimensions()); 00943 solution.SetZero(); 00944 } 00945 Md.Resize(M.rows); 00946 for( int i=0 ; i<iters ; i++ ) 00947 { 00948 M.Multiply( solution , Md ); 00949 r = b-Md; 00950 // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j] 00951 // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j] 00952 // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j] 00953 for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j]; 00954 } 00955 return iters; 00956 } 00957 template< class T > 00958 template< class T2 > 00959 void SparseSymmetricMatrix< T >::getDiagonal( Vector< T2 >& diagonal ) const 00960 { 00961 diagonal.Resize( SparseMatrix<T>::rows ); 00962 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) 00963 { 00964 diagonal[i] = 0.; 00965 for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2; 00966 } 00967 } 00968 00969 } 00970 }