Point Cloud Library (PCL)  1.7.0
/tmp/buildd/pcl-1.7-1.7.0/recognition/include/pcl/recognition/linemod.h
00001 /*
00002  * Software License Agreement (BSD License)
00003  *
00004  *  Point Cloud Library (PCL) - www.pointclouds.org
00005  *  Copyright (c) 2010-2011, Willow Garage, Inc.
00006  *
00007  *  All rights reserved. 
00008  *
00009  *  Redistribution and use in source and binary forms, with or without
00010  *  modification, are permitted provided that the following conditions
00011  *  are met:
00012  *
00013  *   * Redistributions of source code must retain the above copyright
00014  *     notice, this list of conditions and the following disclaimer.
00015  *   * Redistributions in binary form must reproduce the above
00016  *     copyright notice, this list of conditions and the following
00017  *     disclaimer in the documentation and/or other materials provided
00018  *     with the distribution.
00019  *   * Neither the name of Willow Garage, Inc. nor the names of its
00020  *     contributors may be used to endorse or promote products derived
00021  *     from this software without specific prior written permission.
00022  *
00023  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00026  *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
00027  *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00028  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00029  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00030  *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00031  *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00032  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00033  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00034  *  POSSIBILITY OF SUCH DAMAGE.
00035  *
00036  */
00037 
00038 #ifndef PCL_RECOGNITION_LINEMOD
00039 #define PCL_RECOGNITION_LINEMOD
00040 
00041 #include <vector>
00042 #include <cstddef>
00043 #include <string.h>
00044 #include <pcl/pcl_macros.h>
00045 #include <pcl/recognition/quantizable_modality.h>
00046 #include <pcl/recognition/region_xy.h>
00047 #include <pcl/recognition/sparse_quantized_multi_mod_template.h>
00048 
00049 namespace pcl
00050 {
00051 
00052   /** \brief Stores a set of energy maps.
00053     * \author Stefan Holzer
00054     */
00055   class PCL_EXPORTS EnergyMaps
00056   {
00057     public:
00058       /** \brief Constructor. */
00059       EnergyMaps () : width_ (0), height_ (0), nr_bins_ (0), maps_ () 
00060       {
00061       }
00062 
00063       /** \brief Destructor. */
00064       virtual ~EnergyMaps () 
00065       {
00066       }
00067 
00068       /** \brief Returns the width of the energy maps. */
00069       inline size_t 
00070       getWidth () const 
00071       { 
00072         return (width_); 
00073       }
00074       
00075       /** \brief Returns the height of the energy maps. */
00076       inline size_t 
00077       getHeight () const 
00078       { 
00079         return (height_); 
00080       }
00081       
00082       /** \brief Returns the number of bins used for quantization (which is equal to the number of energy maps). */
00083       inline size_t 
00084       getNumOfBins () const
00085       { 
00086         return (nr_bins_);
00087       }
00088 
00089       /** \brief Initializes the set of energy maps.
00090         * \param[in] width the width of the energy maps.
00091         * \param[in] height the height of the energy maps.
00092         * \param[in] nr_bins the number of bins used for quantization.
00093         */
00094       void 
00095       initialize (const size_t width, const size_t height, const size_t nr_bins)
00096       {
00097         maps_.resize(nr_bins, NULL);
00098         width_ = width;
00099         height_ = height;
00100         nr_bins_ = nr_bins;
00101 
00102         const size_t mapsSize = width*height;
00103 
00104         for (size_t map_index = 0; map_index < maps_.size (); ++map_index)
00105         {
00106           //maps_[map_index] = new unsigned char[mapsSize];
00107           maps_[map_index] = reinterpret_cast<unsigned char*> (aligned_malloc (mapsSize));
00108           memset (maps_[map_index], 0, mapsSize);
00109         }
00110       }
00111 
00112       /** \brief Releases the internal data. */
00113       void 
00114       releaseAll ()
00115       {
00116         for (size_t map_index = 0; map_index < maps_.size (); ++map_index)
00117           //if (maps_[map_index] != NULL) delete[] maps_[map_index];
00118           if (maps_[map_index] != NULL) aligned_free (maps_[map_index]);
00119 
00120         maps_.clear ();
00121         width_ = 0;
00122         height_ = 0;
00123         nr_bins_ = 0;
00124       }
00125 
00126       /** \brief Operator for accessing a specific element in the set of energy maps.
00127         * \param[in] bin_index the quantization bin (states which of the energy maps to access).
00128         * \param[in] col_index the column index within the specified energy map.
00129         * \param[in] row_index the row index within the specified energy map.
00130         */
00131       inline unsigned char & 
00132       operator() (const size_t bin_index, const size_t col_index, const size_t row_index)
00133       {
00134         return (maps_[bin_index][row_index*width_ + col_index]);
00135       }
00136 
00137       /** \brief Operator for accessing a specific element in the set of energy maps.
00138         * \param[in] bin_index the quantization bin (states which of the energy maps to access).
00139         * \param[in] index the element index within the specified energy map.
00140         */
00141       inline unsigned char & 
00142       operator() (const size_t bin_index, const size_t index)
00143       {
00144         return (maps_[bin_index][index]);
00145       }
00146 
00147       /** \brief Returns a pointer to the data of the specified energy map.
00148         * \param[in] bin_index the index of the energy map to return (== the quantization bin).
00149         */
00150       inline unsigned char * 
00151       operator() (const size_t bin_index)
00152       {
00153         return (maps_[bin_index]);
00154       }
00155 
00156       /** \brief Operator for accessing a specific element in the set of energy maps.
00157         * \param[in] bin_index the quantization bin (states which of the energy maps to access).
00158         * \param[in] col_index the column index within the specified energy map.
00159         * \param[in] row_index the row index within the specified energy map.
00160         */
00161       inline const unsigned char & 
00162       operator() (const size_t bin_index, const size_t col_index, const size_t row_index) const
00163       {
00164         return (maps_[bin_index][row_index*width_ + col_index]);
00165       }
00166 
00167       /** \brief Operator for accessing a specific element in the set of energy maps.
00168         * \param[in] bin_index the quantization bin (states which of the energy maps to access).
00169         * \param[in] index the element index within the specified energy map.
00170         */
00171       inline const unsigned char & 
00172       operator() (const size_t bin_index, const size_t index) const
00173       {
00174         return (maps_[bin_index][index]);
00175       }
00176 
00177       /** \brief Returns a pointer to the data of the specified energy map.
00178         * \param[in] bin_index the index of the energy map to return (== the quantization bin).
00179         */
00180       inline const unsigned char * 
00181       operator() (const size_t bin_index) const
00182       {
00183         return (maps_[bin_index]);
00184       }
00185 
00186     private:
00187       /** \brief The width of the energy maps. */
00188       size_t width_;
00189       /** \brief The height of the energy maps. */
00190       size_t height_;
00191       /** \brief The number of quantization bins (== the number of internally stored energy maps). */
00192       size_t nr_bins_;
00193       /** \brief Storage for the energy maps. */
00194       std::vector<unsigned char*> maps_;
00195   };
00196 
00197   /** \brief Stores a set of linearized maps.
00198     * \author Stefan Holzer
00199     */
00200   class PCL_EXPORTS LinearizedMaps
00201   {
00202     public:
00203       /** \brief Constructor. */
00204       LinearizedMaps () : width_ (0), height_ (0), mem_width_ (0), mem_height_ (0), step_size_ (0), maps_ ()
00205       {
00206       }
00207       
00208       /** \brief Destructor. */
00209       virtual ~LinearizedMaps () 
00210       {
00211       }
00212 
00213       /** \brief Returns the width of the linearized map. */
00214       inline size_t 
00215       getWidth () const { return (width_); }
00216       
00217       /** \brief Returns the height of the linearized map. */
00218       inline size_t 
00219       getHeight () const { return (height_); }
00220       
00221       /** \brief Returns the step-size used to construct the linearized map. */
00222       inline size_t 
00223       getStepSize () const { return (step_size_); }
00224       
00225       /** \brief Returns the size of the memory map. */
00226       inline size_t 
00227       getMapMemorySize () const { return (mem_width_ * mem_height_); }
00228 
00229       /** \brief Initializes the linearized map.
00230         * \param[in] width the width of the source map.
00231         * \param[in] height the height of the source map.
00232         * \param[in] step_size the step-size used to sample the source map.
00233         */
00234       void 
00235       initialize (const size_t width, const size_t height, const size_t step_size)
00236       {
00237         maps_.resize(step_size*step_size, NULL);
00238         width_ = width;
00239         height_ = height;
00240         mem_width_ = width / step_size;
00241         mem_height_ = height / step_size;
00242         step_size_ = step_size;
00243 
00244         const size_t mapsSize = mem_width_ * mem_height_;
00245 
00246         for (size_t map_index = 0; map_index < maps_.size (); ++map_index)
00247         {
00248           //maps_[map_index] = new unsigned char[2*mapsSize];
00249           maps_[map_index] = reinterpret_cast<unsigned char*> (aligned_malloc (2*mapsSize));
00250           memset (maps_[map_index], 0, 2*mapsSize);
00251         }
00252       }
00253 
00254       /** \brief Releases the internal memory. */
00255       void 
00256       releaseAll ()
00257       {
00258         for (size_t map_index = 0; map_index < maps_.size (); ++map_index)
00259           //if (maps_[map_index] != NULL) delete[] maps_[map_index];
00260           if (maps_[map_index] != NULL) aligned_free (maps_[map_index]);
00261 
00262         maps_.clear ();
00263         width_ = 0;
00264         height_ = 0;
00265         mem_width_ = 0;
00266         mem_height_ = 0;
00267         step_size_ = 0;
00268       }
00269 
00270       /** \brief Operator to access elements of the linearized map by column and row index.
00271         * \param[in] col_index the column index.
00272         * \param[in] row_index the row index.
00273         */
00274       inline unsigned char * 
00275       operator() (const size_t col_index, const size_t row_index)
00276       {
00277         return (maps_[row_index*step_size_ + col_index]);
00278       }
00279 
00280       /** \brief Returns a linearized map starting at the specified position.
00281         * \param[in] col_index the column index at which the returned map starts.
00282         * \param[in] row_index the row index at which the returned map starts.
00283         */
00284       inline unsigned char * 
00285       getOffsetMap (const size_t col_index, const size_t row_index)
00286       {
00287         const size_t map_col = col_index % step_size_;
00288         const size_t map_row = row_index % step_size_;
00289 
00290         const size_t map_mem_col_index = col_index / step_size_;
00291         const size_t map_mem_row_index = row_index / step_size_;
00292 
00293         return (maps_[map_row*step_size_ + map_col] + map_mem_row_index*mem_width_ + map_mem_col_index);
00294       }
00295 
00296     private:
00297       /** \brief the original width of the data represented by the map. */
00298       size_t width_;
00299       /** \brief the original height of the data represented by the map. */
00300       size_t height_;
00301       /** \brief the actual width of the linearized map. */
00302       size_t mem_width_;
00303       /** \brief the actual height of the linearized map. */
00304       size_t mem_height_;
00305       /** \brief the step-size used for sampling the original data. */
00306       size_t step_size_;
00307       /** \brief a vector containing all the linearized maps. */
00308       std::vector<unsigned char*> maps_;
00309   };
00310 
00311   /** \brief Represents a detection of a template using the LINEMOD approach.
00312     * \author Stefan Holzer
00313     */
00314   struct PCL_EXPORTS LINEMODDetection
00315   {
00316     /** \brief Constructor. */
00317     LINEMODDetection () : x (0), y (0), template_id (0), score (0.0f), scale (1.0f) {}
00318 
00319     /** \brief x-position of the detection. */
00320     int x;
00321     /** \brief y-position of the detection. */
00322     int y;
00323     /** \brief ID of the detected template. */
00324     int template_id;
00325     /** \brief score of the detection. */
00326     float score;
00327     /** \brief scale at which the template was detected. */
00328     float scale;
00329   };
00330 
00331   /**
00332     * \brief Template matching using the LINEMOD approach.
00333     * \author Stefan Holzer, Stefan Hinterstoisser
00334     */
00335   class PCL_EXPORTS LINEMOD
00336   {
00337     public:
00338       /** \brief Constructor */
00339       LINEMOD ();
00340 
00341       /** \brief Destructor */
00342       virtual ~LINEMOD ();
00343 
00344       /** \brief Creates a template from the specified data and adds it to the matching queue. 
00345         * \param[in] modalities the modalities used to create the template.
00346         * \param[in] masks the masks that determine which parts of the modalities are used for creating the template.
00347         * \param[in] region the region which will be associated with the template (can be larger than the actual modality-maps).
00348         */
00349       int 
00350       createAndAddTemplate (const std::vector<QuantizableModality*> & modalities,
00351                             const std::vector<MaskMap*> & masks,
00352                             const RegionXY & region);
00353 
00354       /** \brief Adds the specified template to the matching queue.
00355         * \param[in] linemod_template the template to add.
00356         */
00357       int
00358       addTemplate (const SparseQuantizedMultiModTemplate & linemod_template);
00359 
00360       /** \brief Detects the stored templates in the supplied modality data.
00361         * \param[in] modalities the modalities that will be used for detection.
00362         * \param[out] detections the destination for the detections.
00363         */
00364       void
00365       detectTemplates (const std::vector<QuantizableModality*> & modalities,
00366                        std::vector<LINEMODDetection> & detections) const;
00367 
00368       /** \brief Detects the stored templates in a semi scale invariant manner 
00369         *        by applying the detection to multiple scaled versions of the input data.
00370         * \param[in] modalities the modalities that will be used for detection.
00371         * \param[out] detections the destination for the detections.
00372         * \param[in] min_scale the minimum scale.
00373         * \param[in] max_scale the maximum scale.
00374         * \param[in] scale_multiplier the multiplier for getting from one scale to the next.
00375         */
00376       void
00377       detectTemplatesSemiScaleInvariant (const std::vector<QuantizableModality*> & modalities,
00378                                          std::vector<LINEMODDetection> & detections,
00379                                          float min_scale = 0.6944444f,
00380                                          float max_scale = 1.44f,
00381                                          float scale_multiplier = 1.2f) const;
00382 
00383       /** \brief Matches the stored templates to the supplied modality data.
00384         * \param[in] modalities the modalities that will be used for matching.
00385         * \param[out] matches the found matches.
00386         */
00387       void
00388       matchTemplates (const std::vector<QuantizableModality*> & modalities,
00389                       std::vector<LINEMODDetection> & matches) const;
00390 
00391       /** \brief Sets the detection threshold. 
00392         * \param[in] threshold the detection threshold.
00393         */
00394       inline void
00395       setDetectionThreshold (float threshold)
00396       {
00397         template_threshold_ = threshold;
00398       }
00399 
00400       /** \brief Enables/disables non-maximum suppression.
00401         * \param[in] use_non_max_suppression determines whether to use non-maximum suppression or not.
00402         */
00403       inline void
00404       setNonMaxSuppression (bool use_non_max_suppression)
00405       {
00406         use_non_max_suppression_ = use_non_max_suppression;
00407       }
00408 
00409       /** \brief Enables/disables averaging of close detections.
00410         * \param[in] average_detections determines whether to average close detections or not.
00411         */
00412       inline void
00413       setDetectionAveraging (bool average_detections)
00414       {
00415         average_detections_ = average_detections;
00416       }
00417 
00418       /** \brief Returns the template with the specified ID.
00419         * \param[in] template_id the ID of the template to return.
00420         */
00421       inline const SparseQuantizedMultiModTemplate &
00422       getTemplate (int template_id) const
00423       { 
00424         return (templates_[template_id]);
00425       }
00426 
00427       /** \brief Returns the number of stored/trained templates. */
00428       inline size_t
00429       getNumOfTemplates () const
00430       {
00431         return (templates_.size ());
00432       }
00433 
00434       /** \brief Saves the stored templates to the specified file.
00435         * \param[in] file_name the name of the file to save the templates to.
00436         */
00437       void
00438       saveTemplates (const char * file_name) const;
00439 
00440       /** \brief Loads templates from the specified file.
00441         * \param[in] file_name the name of the file to load the template from.
00442         */
00443       void
00444       loadTemplates (const char * file_name);
00445 
00446       /** \brief Serializes the stored templates to the specified stream.
00447         * \param[in] stream the stream the templates will be written to.
00448         */
00449       void 
00450       serialize (std::ostream & stream) const;
00451 
00452       /** \brief Deserializes templates from the specified stream.
00453         * \param[in] stream the stream the templates will be read from.
00454         */
00455       void 
00456       deserialize (std::istream & stream);
00457 
00458 
00459     private:
00460       /** template response threshold */
00461       float template_threshold_;
00462       /** states whether non-max-suppression on detections is enabled or not */
00463       bool use_non_max_suppression_;
00464       /** states whether to return an averaged detection */
00465       bool average_detections_;
00466       /** template storage */
00467       std::vector<SparseQuantizedMultiModTemplate> templates_;
00468   };
00469 
00470 }
00471 
00472 #endif