1 | //---------------------------------------------------------------------- |
---|
2 | // File: kd_search.cpp |
---|
3 | // Programmer: Sunil Arya and David Mount |
---|
4 | // Description: Standard kd-tree search |
---|
5 | // Last modified: 01/04/05 (Version 1.0) |
---|
6 | //---------------------------------------------------------------------- |
---|
7 | // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and |
---|
8 | // David Mount. All Rights Reserved. |
---|
9 | // |
---|
10 | // This software and related documentation is part of the Approximate |
---|
11 | // Nearest Neighbor Library (ANN). This software is provided under |
---|
12 | // the provisions of the Lesser GNU Public License (LGPL). See the |
---|
13 | // file ../ReadMe.txt for further information. |
---|
14 | // |
---|
15 | // The University of Maryland (U.M.) and the authors make no |
---|
16 | // representations about the suitability or fitness of this software for |
---|
17 | // any purpose. It is provided "as is" without express or implied |
---|
18 | // warranty. |
---|
19 | //---------------------------------------------------------------------- |
---|
20 | // History: |
---|
21 | // Revision 0.1 03/04/98 |
---|
22 | // Initial release |
---|
23 | // Revision 1.0 04/01/05 |
---|
24 | // Changed names LO, HI to ANN_LO, ANN_HI |
---|
25 | //---------------------------------------------------------------------- |
---|
26 | |
---|
27 | #include "kd_search.h" // kd-search declarations |
---|
28 | |
---|
29 | //---------------------------------------------------------------------- |
---|
30 | // Approximate nearest neighbor searching by kd-tree search |
---|
31 | // The kd-tree is searched for an approximate nearest neighbor. |
---|
32 | // The point is returned through one of the arguments, and the |
---|
33 | // distance returned is the squared distance to this point. |
---|
34 | // |
---|
35 | // The method used for searching the kd-tree is an approximate |
---|
36 | // adaptation of the search algorithm described by Friedman, |
---|
37 | // Bentley, and Finkel, ``An algorithm for finding best matches |
---|
38 | // in logarithmic expected time,'' ACM Transactions on Mathematical |
---|
39 | // Software, 3(3):209-226, 1977). |
---|
40 | // |
---|
41 | // The algorithm operates recursively. When first encountering a |
---|
42 | // node of the kd-tree we first visit the child which is closest to |
---|
43 | // the query point. On return, we decide whether we want to visit |
---|
44 | // the other child. If the box containing the other child exceeds |
---|
45 | // 1/(1+eps) times the current best distance, then we skip it (since |
---|
46 | // any point found in this child cannot be closer to the query point |
---|
47 | // by more than this factor.) Otherwise, we visit it recursively. |
---|
48 | // The distance between a box and the query point is computed exactly |
---|
49 | // (not approximated as is often done in kd-tree), using incremental |
---|
50 | // distance updates, as described by Arya and Mount in ``Algorithms |
---|
51 | // for fast vector quantization,'' Proc. of DCC '93: Data Compression |
---|
52 | // Conference, eds. J. A. Storer and M. Cohn, IEEE Press, 1993, |
---|
53 | // 381-390. |
---|
54 | // |
---|
55 | // The main entry points is annkSearch() which sets things up and |
---|
56 | // then call the recursive routine ann_search(). This is a recursive |
---|
57 | // routine which performs the processing for one node in the kd-tree. |
---|
58 | // There are two versions of this virtual procedure, one for splitting |
---|
59 | // nodes and one for leaves. When a splitting node is visited, we |
---|
60 | // determine which child to visit first (the closer one), and visit |
---|
61 | // the other child on return. When a leaf is visited, we compute |
---|
62 | // the distances to the points in the buckets, and update information |
---|
63 | // on the closest points. |
---|
64 | // |
---|
65 | // Some trickery is used to incrementally update the distance from |
---|
66 | // a kd-tree rectangle to the query point. This comes about from |
---|
67 | // the fact that which each successive split, only one component |
---|
68 | // (along the dimension that is split) of the squared distance to |
---|
69 | // the child rectangle is different from the squared distance to |
---|
70 | // the parent rectangle. |
---|
71 | //---------------------------------------------------------------------- |
---|
72 | |
---|
73 | //---------------------------------------------------------------------- |
---|
74 | // To keep argument lists short, a number of global variables |
---|
75 | // are maintained which are common to all the recursive calls. |
---|
76 | // These are given below. |
---|
77 | //---------------------------------------------------------------------- |
---|
78 | |
---|
79 | int ANNkdDim; // dimension of space |
---|
80 | ANNpoint ANNkdQ; // query point |
---|
81 | double ANNkdMaxErr; // max tolerable squared error |
---|
82 | ANNpointArray ANNkdPts; // the points |
---|
83 | ANNmin_k *ANNkdPointMK; // set of k closest points |
---|
84 | |
---|
85 | //---------------------------------------------------------------------- |
---|
86 | // annkSearch - search for the k nearest neighbors |
---|
87 | //---------------------------------------------------------------------- |
---|
88 | |
---|
89 | void ANNkd_tree::annkSearch( |
---|
90 | ANNpoint q, // the query point |
---|
91 | int k, // number of near neighbors to return |
---|
92 | ANNidxArray nn_idx, // nearest neighbor indices (returned) |
---|
93 | ANNdistArray dd, // the approximate nearest neighbor |
---|
94 | double eps) // the error bound |
---|
95 | { |
---|
96 | |
---|
97 | ANNkdDim = dim; // copy arguments to static equivs |
---|
98 | ANNkdQ = q; |
---|
99 | ANNkdPts = pts; |
---|
100 | ANNptsVisited = 0; // initialize count of points visited |
---|
101 | |
---|
102 | if (k > n_pts) { // too many near neighbors? |
---|
103 | annError("Requesting more near neighbors than data points", ANNabort); |
---|
104 | } |
---|
105 | |
---|
106 | ANNkdMaxErr = ANN_POW(1.0 + eps); |
---|
107 | ANN_FLOP(2) // increment floating op count |
---|
108 | |
---|
109 | ANNkdPointMK = new ANNmin_k(k); // create set for closest k points |
---|
110 | // search starting at the root |
---|
111 | root->ann_search(annBoxDistance(q, bnd_box_lo, bnd_box_hi, dim)); |
---|
112 | |
---|
113 | for (int i = 0; i < k; i++) { // extract the k-th closest points |
---|
114 | dd[i] = ANNkdPointMK->ith_smallest_key(i); |
---|
115 | nn_idx[i] = ANNkdPointMK->ith_smallest_info(i); |
---|
116 | } |
---|
117 | delete ANNkdPointMK; // deallocate closest point set |
---|
118 | } |
---|
119 | |
---|
120 | //---------------------------------------------------------------------- |
---|
121 | // kd_split::ann_search - search a splitting node |
---|
122 | //---------------------------------------------------------------------- |
---|
123 | |
---|
124 | void ANNkd_split::ann_search(ANNdist box_dist) |
---|
125 | { |
---|
126 | // check dist calc term condition |
---|
127 | if (ANNmaxPtsVisited != 0 && ANNptsVisited > ANNmaxPtsVisited) return; |
---|
128 | |
---|
129 | // distance to cutting plane |
---|
130 | ANNcoord cut_diff = ANNkdQ[cut_dim] - cut_val; |
---|
131 | |
---|
132 | if (cut_diff < 0) { // left of cutting plane |
---|
133 | child[ANN_LO]->ann_search(box_dist);// visit closer child first |
---|
134 | |
---|
135 | ANNcoord box_diff = cd_bnds[ANN_LO] - ANNkdQ[cut_dim]; |
---|
136 | if (box_diff < 0) // within bounds - ignore |
---|
137 | box_diff = 0; |
---|
138 | // distance to further box |
---|
139 | box_dist = (ANNdist) ANN_SUM(box_dist, |
---|
140 | ANN_DIFF(ANN_POW(box_diff), ANN_POW(cut_diff))); |
---|
141 | |
---|
142 | // visit further child if close enough |
---|
143 | if (box_dist * ANNkdMaxErr < ANNkdPointMK->max_key()) |
---|
144 | child[ANN_HI]->ann_search(box_dist); |
---|
145 | |
---|
146 | } |
---|
147 | else { // right of cutting plane |
---|
148 | child[ANN_HI]->ann_search(box_dist);// visit closer child first |
---|
149 | |
---|
150 | ANNcoord box_diff = ANNkdQ[cut_dim] - cd_bnds[ANN_HI]; |
---|
151 | if (box_diff < 0) // within bounds - ignore |
---|
152 | box_diff = 0; |
---|
153 | // distance to further box |
---|
154 | box_dist = (ANNdist) ANN_SUM(box_dist, |
---|
155 | ANN_DIFF(ANN_POW(box_diff), ANN_POW(cut_diff))); |
---|
156 | |
---|
157 | // visit further child if close enough |
---|
158 | if (box_dist * ANNkdMaxErr < ANNkdPointMK->max_key()) |
---|
159 | child[ANN_LO]->ann_search(box_dist); |
---|
160 | |
---|
161 | } |
---|
162 | ANN_FLOP(10) // increment floating ops |
---|
163 | ANN_SPL(1) // one more splitting node visited |
---|
164 | } |
---|
165 | |
---|
166 | //---------------------------------------------------------------------- |
---|
167 | // kd_leaf::ann_search - search points in a leaf node |
---|
168 | // Note: The unreadability of this code is the result of |
---|
169 | // some fine tuning to replace indexing by pointer operations. |
---|
170 | //---------------------------------------------------------------------- |
---|
171 | |
---|
172 | void ANNkd_leaf::ann_search(ANNdist box_dist) |
---|
173 | { |
---|
174 | register ANNdist dist; // distance to data point |
---|
175 | register ANNcoord* pp; // data coordinate pointer |
---|
176 | register ANNcoord* qq; // query coordinate pointer |
---|
177 | register ANNdist min_dist; // distance to k-th closest point |
---|
178 | register ANNcoord t; |
---|
179 | register int d; |
---|
180 | |
---|
181 | min_dist = ANNkdPointMK->max_key(); // k-th smallest distance so far |
---|
182 | |
---|
183 | for (int i = 0; i < n_pts; i++) { // check points in bucket |
---|
184 | |
---|
185 | pp = ANNkdPts[bkt[i]]; // first coord of next data point |
---|
186 | qq = ANNkdQ; // first coord of query point |
---|
187 | dist = 0; |
---|
188 | |
---|
189 | for(d = 0; d < ANNkdDim; d++) { |
---|
190 | ANN_COORD(1) // one more coordinate hit |
---|
191 | ANN_FLOP(4) // increment floating ops |
---|
192 | |
---|
193 | t = *(qq++) - *(pp++); // compute length and adv coordinate |
---|
194 | // exceeds dist to k-th smallest? |
---|
195 | if( (dist = ANN_SUM(dist, ANN_POW(t))) > min_dist) { |
---|
196 | break; |
---|
197 | } |
---|
198 | } |
---|
199 | |
---|
200 | if (d >= ANNkdDim && // among the k best? |
---|
201 | (ANN_ALLOW_SELF_MATCH || dist!=0)) { // and no self-match problem |
---|
202 | // add it to the list |
---|
203 | ANNkdPointMK->insert(dist, bkt[i]); |
---|
204 | min_dist = ANNkdPointMK->max_key(); |
---|
205 | } |
---|
206 | } |
---|
207 | ANN_LEAF(1) // one more leaf node visited |
---|
208 | ANN_PTS(n_pts) // increment points visited |
---|
209 | ANNptsVisited += n_pts; // increment number of points visited |
---|
210 | } |
---|