libvot  0.1.3
A C++11 multithread library for image retrieval
vocab_tree.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015, Tianwei Shen
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8 * Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10 
11 * Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 * Neither the name of libvot nor the names of its
16  contributors may be used to endorse or promote products derived from
17  this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 */
31 
35 #ifndef VOCAB_TREE_HEADER
36 #define VOCAB_TREE_HEADER
37 
38 #include <vector>
39 #include <cstdlib>
40 #include <mutex>
41 
42 #include "utils/global_params.h"
43 #include "utils/data_types.h"
44 
45 namespace vot
46 {
47  typedef enum
48  {
49  L1 = 1,
50  L2 = 2
51  } DistanceType;
52 
56  class ImageCount
57  {
58  public:
59  ImageCount() : index(0), count(0.0) { }
60  ImageCount(size_t index_, float count_) : index(index_), count(count_) { }
61 
62  size_t index;
63  float count;
64  };
65 
69  class TreeNode
70  {
71  public:
72  TreeNode():des(NULL) {}
73  virtual ~TreeNode()
74  {
75  if(des != NULL)
76  {
77  delete [] des;
78  des = NULL;
79  }
80  }
81  // pure virtual function that will be implemented in derived classes
82  virtual bool RecursiveBuild(size_t num_keys, int dim, int depth, int depth_curr, int bf, DTYPE **p , double *means, int *assign, int thread_num = 1) = 0;
83  virtual bool ClearNode(int bf) = 0;
84  virtual bool WriteNode(FILE *f, int branch_num, int dim) const = 0;
85  virtual bool ReadNode(FILE *f, int branch_num, int dim) = 0;
86  virtual size_t CountNodes(int branch_num) const = 0;
87  virtual size_t CountLeaves(int branch_num) const = 0;
88  virtual bool Compare(TreeNode *in, int branch_num, int dim) const = 0;
89  virtual bool ClearScores(int bf) = 0;
90  // function for build image database
91  virtual size_t DescendFeature(float *q, DTYPE *v, size_t image_index, int branch_num, int dim, bool add = true) = 0;
92  virtual double ComputeImageVectorMagnitude(int bf, DistanceType dt) = 0;
93  virtual bool SetConstantWeight(int bf) = 0;
94  virtual bool ComputeTFIDFWeight(int bf, size_t n) = 0;
95  virtual bool ComputeDatabaseMagnitude(int bf, DistanceType dis_type, size_t start_id, std::vector<float> &database_mag) = 0;
96  virtual bool NormalizeDatabase(int bf, size_t start_id, std::vector<float> &database_mag) = 0;
97  // member functions for querying database
98  virtual bool IndexLeaves(int branch_num) = 0;
99  virtual bool FillQueryVector(float *q, int branch_num, float normalize_factor) = 0;
100  virtual bool ScoreQuery(float *q, int branch_num, DistanceType dt, float *scores) = 0;
101 
103  size_t id;
104  };
105 
109  class TreeInNode: public TreeNode
110  {
111  public:
112  TreeInNode():children(NULL) {}
113  virtual ~TreeInNode();
114  virtual bool RecursiveBuild(size_t num_keys, int dim, int depth, int depth_curr, int bf, DTYPE **p, double *means, int *assign, int thread_num = 1);
115  virtual bool ClearNode(int bf);
116  virtual bool WriteNode(FILE *f, int branch_num, int dim) const;
117  virtual bool ReadNode(FILE *f, int branch_num, int dim);
118  virtual size_t CountNodes(int branch_num) const ;
119  virtual size_t CountLeaves(int branch_num) const ;
120  virtual bool Compare(TreeNode *in, int branch_num, int dim) const;
121  virtual bool ClearScores(int bf);
122  // function for build image database
123  virtual size_t DescendFeature(float *q, DTYPE *v, size_t image_index, int branch_num, int dim, bool add = true);
124  virtual double ComputeImageVectorMagnitude(int bf, DistanceType dt);
125  virtual bool SetConstantWeight(int bf);
126  virtual bool ComputeTFIDFWeight(int bf, size_t n);
127  virtual bool ComputeDatabaseMagnitude(int bf, DistanceType dis_type, size_t start_id, std::vector<float> &database_mag);
128  virtual bool NormalizeDatabase(int bf, size_t start_id, std::vector<float> &database_mag);
129  // member functions for querying database
130  virtual bool IndexLeaves(int branch_num);
131  virtual bool FillQueryVector(float *q, int branch_num, float normalize_factor);
132  virtual bool ScoreQuery(float *q, int branch_num, DistanceType dt, float *scores);
133 
135  };
136 
140  class TreeLeafNode: public TreeNode
141  {
142  public:
143  TreeLeafNode():score(0.0), weight(0.0) {}
144  virtual ~TreeLeafNode();
145  virtual bool RecursiveBuild(size_t num_keys, int dim, int depth, int depth_curr, int bf, DTYPE **p , double *means, int *assign, int thread_num = 1);
146  virtual bool ClearNode(int bf);
147  virtual bool WriteNode(FILE *f, int branch_num, int dim) const;
148  virtual bool ReadNode(FILE *f, int branch_num, int dim);
149  virtual size_t CountNodes(int branch_num) const;
150  virtual size_t CountLeaves(int branch_num) const;
151  virtual bool Compare(TreeNode *leaf, int branch_num, int dim) const;
152  virtual bool ClearScores(int bf);
153  // function for build image database
154  virtual size_t DescendFeature(float *q, DTYPE *v, size_t image_index, int branch_num, int dim, bool add = true);
155  virtual double ComputeImageVectorMagnitude(int bf, DistanceType dt);
156  virtual bool SetConstantWeight(int bf);
157  virtual bool ComputeTFIDFWeight(int bf, size_t n);
158  virtual bool ComputeDatabaseMagnitude(int bf, DistanceType dis_type, size_t start_id, std::vector<float> &database_mag);
159  virtual bool NormalizeDatabase(int bf, size_t start_id, std::vector<float> &database_mag);
160  // member functions for querying database
161  virtual bool IndexLeaves(int branch_num);
162  virtual bool FillQueryVector(float *q, int branch_num, float normalize_factor);
163  virtual bool ScoreQuery(float *q, int branch_num, DistanceType dt, float *scores);
164 
165  float score;
166  float weight;
167  std::vector<ImageCount> inv_list;
168  std::mutex add_lock;
169  };
170 
174  class VocabTree
175  {
176  public:
177  // constructors and destructors
178  VocabTree();
179  VocabTree(int depth_, int branch_num_, int dim_ = 128, DistanceType dis_type_ = L2);
180  ~VocabTree();
181 
182  // member function for building vocabulary tree
183  bool BuildTree(size_t num_keys, int dim, int depth, int bf, DTYPE **p, int thread_num = 1);
184  bool WriteTree(const char *filename) const;
185  bool ReadTree(const char *filename);
186  bool ClearTree();
187  bool Compare(VocabTree &v) const;
188  // member function for building image database
189  double AddImage2Tree(size_t image_index, tw::SiftData &sift, int thread_num);
190  void Show() const;
191  bool SetConstantWeight();
192  bool ComputeTFIDFWeight(size_t image_num);
193  bool NormalizeDatabase(size_t start_id, size_t image_num);
194  // member function for querying database
195  bool Query(tw::SiftData &sift, float *scores);
196  size_t IndexLeaves();
197 
198  // public data member
200  int depth;
201  int dim;
203  size_t num_nodes;
204  size_t num_leaves;
207  };
208 
209 } // end of namespace vot
210 
211 #endif //VOCAB_TREE_HEADER
virtual bool ComputeDatabaseMagnitude(int bf, DistanceType dis_type, size_t start_id, std::vector< float > &database_mag)
compute the vector magnitude of all images in the database
Definition: vocab_tree.cpp:888
ImageCount(size_t index_, float count_)
Definition: vocab_tree.h:60
virtual bool Compare(TreeNode *in, int branch_num, int dim) const =0
virtual bool NormalizeDatabase(int bf, size_t start_id, std::vector< float > &database_mag)=0
normalize the inverted list score by the magnitude of image vector
size_t id
the id of the node
Definition: vocab_tree.h:103
#define DTYPE
Definition: global_params.h:41
DTYPE * des
the descriptor vector
Definition: vocab_tree.h:102
virtual bool FillQueryVector(float *q, int branch_num, float normalize_factor)
fill the query vector
Definition: vocab_tree.cpp:1018
std::mutex add_lock
a mutex used for multithreaded AddImage2Tree
Definition: vocab_tree.h:168
virtual bool ClearScores(int bf)=0
refresh the temporary score for this tree
double AddImage2Tree(size_t image_index, tw::SiftData &sift, int thread_num)
add an image into the database (support multi-thread)
Definition: vocab_tree.cpp:671
virtual bool ClearNode(int bf)=0
virtual bool ScoreQuery(float *q, int branch_num, DistanceType dt, float *scores)=0
score each image in the database
virtual bool RecursiveBuild(size_t num_keys, int dim, int depth, int depth_curr, int bf, DTYPE **p, double *means, int *assign, int thread_num=1)=0
bool Query(tw::SiftData &sift, float *scores)
query database and return the scores
Definition: vocab_tree.cpp:949
virtual bool WriteNode(FILE *f, int branch_num, int dim) const =0
virtual bool SetConstantWeight(int bf)
set a constant weight to the leaf nodes
Definition: vocab_tree.cpp:817
virtual bool Compare(TreeNode *in, int branch_num, int dim) const
Definition: vocab_tree.cpp:610
virtual bool ClearScores(int bf)
refresh the temporary score for this tree
Definition: vocab_tree.cpp:119
float count
Definition: vocab_tree.h:63
virtual bool ComputeTFIDFWeight(int bf, size_t n)
compute TF-IDF weight and pre-apply weight adjusting to inverted lists
Definition: vocab_tree.cpp:840
virtual bool IndexLeaves(int branch_num)
Definition: vocab_tree.cpp:1012
DistanceType dis_type
the distance type
Definition: vocab_tree.h:205
TreeNode()
Definition: vocab_tree.h:72
TreeNode * root
the root of the tree
Definition: vocab_tree.h:206
TreeInNode()
Definition: vocab_tree.h:112
bool BuildTree(size_t num_keys, int dim, int depth, int bf, DTYPE **p, int thread_num=1)
build a vocabulary tree from a set of features
Definition: vocab_tree.cpp:143
The virtual class of tree node.
Definition: vocab_tree.h:69
virtual bool RecursiveBuild(size_t num_keys, int dim, int depth, int depth_curr, int bf, DTYPE **p, double *means, int *assign, int thread_num=1)
Definition: vocab_tree.cpp:331
The leaf node class of the tree.
Definition: vocab_tree.h:140
virtual bool ComputeDatabaseMagnitude(int bf, DistanceType dis_type, size_t start_id, std::vector< float > &database_mag)
compute the vector magnitude of all images in the database
Definition: vocab_tree.cpp:898
TreeLeafNode()
Definition: vocab_tree.h:143
virtual bool WriteNode(FILE *f, int branch_num, int dim) const
Definition: vocab_tree.cpp:367
virtual size_t DescendFeature(float *q, DTYPE *v, size_t image_index, int branch_num, int dim, bool add=true)
Definition: vocab_tree.cpp:746
~VocabTree()
Definition: vocab_tree.cpp:67
virtual size_t CountNodes(int branch_num) const
Definition: vocab_tree.cpp:558
float weight
weight for this node
Definition: vocab_tree.h:166
virtual double ComputeImageVectorMagnitude(int bf, DistanceType dt)
Definition: vocab_tree.cpp:782
bool ReadTree(const char *filename)
read a vocabulary tree from a file
Definition: vocab_tree.cpp:420
virtual size_t CountLeaves(int branch_num) const
Definition: vocab_tree.cpp:572
std::vector< ImageCount > inv_list
image inverted list
Definition: vocab_tree.h:167
void Show() const
a test function
Definition: vocab_tree.cpp:551
The vocabulary tree class. The depth of the root is 0. The depth of the leaf nodes is depth+1...
Definition: vocab_tree.h:174
int dim
the dimension of the descriptor
Definition: vocab_tree.h:201
virtual ~TreeLeafNode()
Definition: vocab_tree.cpp:78
virtual bool WriteNode(FILE *f, int branch_num, int dim) const
Definition: vocab_tree.cpp:399
bool ComputeTFIDFWeight(size_t image_num)
compute TF-IDF weight and pre-apply weight adjusting to inverted lists
Definition: vocab_tree.cpp:823
bool SetConstantWeight()
set a constant weight to the leaf nodes
Definition: vocab_tree.cpp:796
virtual size_t CountNodes(int branch_num) const
Definition: vocab_tree.cpp:569
DistanceType
Definition: vocab_tree.h:47
size_t IndexLeaves()
index the leaf nodes
Definition: vocab_tree.cpp:995
VocabTree()
Definition: vocab_tree.cpp:62
virtual bool ClearNode(int bf)
Definition: vocab_tree.cpp:102
virtual bool ComputeTFIDFWeight(int bf, size_t n)=0
compute TF-IDF weight and pre-apply weight adjusting to inverted lists
bool WriteTree(const char *filename) const
save the vocabulary tree in a file
Definition: vocab_tree.cpp:342
ImageCount()
Definition: vocab_tree.h:59
size_t num_leaves
the number of leaf nodes in the tree
Definition: vocab_tree.h:204
virtual bool FillQueryVector(float *q, int branch_num, float normalize_factor)
fill the query vector
Definition: vocab_tree.cpp:1030
int branch_num
the branch number of a node
Definition: vocab_tree.h:199
virtual bool RecursiveBuild(size_t num_keys, int dim, int depth, int depth_curr, int bf, DTYPE **p, double *means, int *assign, int thread_num=1)
Definition: vocab_tree.cpp:195
virtual bool ComputeDatabaseMagnitude(int bf, DistanceType dis_type, size_t start_id, std::vector< float > &database_mag)=0
compute the vector magnitude of all images in the database
virtual bool SetConstantWeight(int bf)=0
set a constant weight to the leaf nodes
bool NormalizeDatabase(size_t start_id, size_t image_num)
normalize the inverted list score by the magnitude of image vector
Definition: vocab_tree.cpp:861
virtual bool FillQueryVector(float *q, int branch_num, float normalize_factor)=0
fill the query vector
virtual bool SetConstantWeight(int bf)
set a constant weight to the leaf nodes
Definition: vocab_tree.cpp:805
float score
temporary score, for querying and computing magnitude use
Definition: vocab_tree.h:165
virtual ~TreeNode()
Definition: vocab_tree.h:73
size_t num_nodes
the number of nodes in the tree
Definition: vocab_tree.h:203
virtual bool IndexLeaves(int branch_num)
Definition: vocab_tree.cpp:1002
virtual bool ComputeTFIDFWeight(int bf, size_t n)
compute TF-IDF weight and pre-apply weight adjusting to inverted lists
Definition: vocab_tree.cpp:830
virtual bool ReadNode(FILE *f, int branch_num, int dim)
Definition: vocab_tree.cpp:460
virtual size_t DescendFeature(float *q, DTYPE *v, size_t image_index, int branch_num, int dim, bool add=true)=0
virtual bool ClearScores(int bf)
refresh the temporary score for this tree
Definition: vocab_tree.cpp:131
size_t database_image_num
the number of the database images
Definition: vocab_tree.h:202
virtual size_t DescendFeature(float *q, DTYPE *v, size_t image_index, int branch_num, int dim, bool add=true)
Definition: vocab_tree.cpp:724
Sift data structure used in libvot.
Definition: data_types.h:22
This class contains a index for an image and a count for image.
Definition: vocab_tree.h:56
virtual bool ClearNode(int bf)
Definition: vocab_tree.cpp:113
global parameters and utility functions
virtual bool Compare(TreeNode *leaf, int branch_num, int dim) const
Definition: vocab_tree.cpp:636
Definition: vocab_tree.h:49
virtual bool ScoreQuery(float *q, int branch_num, DistanceType dt, float *scores)
score each image in the database
Definition: vocab_tree.cpp:1048
size_t index
Definition: vocab_tree.h:62
namespace vot contains libvot functions and classes.
Definition: image_graph.cpp:49
virtual bool IndexLeaves(int branch_num)=0
TreeNode ** children
Definition: vocab_tree.h:134
int depth
the depth of the tree
Definition: vocab_tree.h:200
bool Compare(VocabTree &v) const
compare two vocabulary tree and returns whether they are the same
Definition: vocab_tree.cpp:586
The interior node class of the tree.
Definition: vocab_tree.h:109
virtual bool ReadNode(FILE *f, int branch_num, int dim)=0
virtual bool ReadNode(FILE *f, int branch_num, int dim)
Definition: vocab_tree.cpp:510
virtual bool ScoreQuery(float *q, int branch_num, DistanceType dt, float *scores)
score each image in the database
Definition: vocab_tree.cpp:1036
virtual size_t CountNodes(int branch_num) const =0
bool ClearTree()
release the memory
Definition: vocab_tree.cpp:91
virtual size_t CountLeaves(int branch_num) const =0
virtual bool NormalizeDatabase(int bf, size_t start_id, std::vector< float > &database_mag)
normalize the inverted list score by the magnitude of image vector
Definition: vocab_tree.cpp:931
virtual ~TreeInNode()
Definition: vocab_tree.cpp:69
virtual double ComputeImageVectorMagnitude(int bf, DistanceType dt)=0
virtual size_t CountLeaves(int branch_num) const
Definition: vocab_tree.cpp:583
virtual double ComputeImageVectorMagnitude(int bf, DistanceType dt)
Definition: vocab_tree.cpp:771
virtual bool NormalizeDatabase(int bf, size_t start_id, std::vector< float > &database_mag)
normalize the inverted list score by the magnitude of image vector
Definition: vocab_tree.cpp:921
Definition: vocab_tree.h:50
internal data types (structs and classes) used in libvot