[37] | 1 | //---------------------------------------------------------------------- |
---|
| 2 | // File: kd_search.cpp |
---|
| 3 | // Programmer: Sunil Arya and David Mount |
---|
| 4 | // Description: Standard kd-tree search |
---|
| 5 | // Last modified: 01/04/05 (Version 1.0) |
---|
| 6 | //---------------------------------------------------------------------- |
---|
| 7 | // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and |
---|
| 8 | // David Mount. All Rights Reserved. |
---|
| 9 | // |
---|
| 10 | // This software and related documentation is part of the Approximate |
---|
| 11 | // Nearest Neighbor Library (ANN). This software is provided under |
---|
| 12 | // the provisions of the Lesser GNU Public License (LGPL). See the |
---|
| 13 | // file ../ReadMe.txt for further information. |
---|
| 14 | // |
---|
| 15 | // The University of Maryland (U.M.) and the authors make no |
---|
| 16 | // representations about the suitability or fitness of this software for |
---|
| 17 | // any purpose. It is provided "as is" without express or implied |
---|
| 18 | // warranty. |
---|
| 19 | //---------------------------------------------------------------------- |
---|
| 20 | // History: |
---|
| 21 | // Revision 0.1 03/04/98 |
---|
| 22 | // Initial release |
---|
| 23 | // Revision 1.0 04/01/05 |
---|
| 24 | // Changed names LO, HI to ANN_LO, ANN_HI |
---|
| 25 | //---------------------------------------------------------------------- |
---|
| 26 | |
---|
| 27 | #include "kd_search.h" // kd-search declarations |
---|
| 28 | |
---|
| 29 | //---------------------------------------------------------------------- |
---|
| 30 | // Approximate nearest neighbor searching by kd-tree search |
---|
| 31 | // The kd-tree is searched for an approximate nearest neighbor. |
---|
| 32 | // The point is returned through one of the arguments, and the |
---|
| 33 | // distance returned is the squared distance to this point. |
---|
| 34 | // |
---|
| 35 | // The method used for searching the kd-tree is an approximate |
---|
| 36 | // adaptation of the search algorithm described by Friedman, |
---|
| 37 | // Bentley, and Finkel, ``An algorithm for finding best matches |
---|
| 38 | // in logarithmic expected time,'' ACM Transactions on Mathematical |
---|
| 39 | // Software, 3(3):209-226, 1977). |
---|
| 40 | // |
---|
| 41 | // The algorithm operates recursively. When first encountering a |
---|
| 42 | // node of the kd-tree we first visit the child which is closest to |
---|
| 43 | // the query point. On return, we decide whether we want to visit |
---|
| 44 | // the other child. If the box containing the other child exceeds |
---|
| 45 | // 1/(1+eps) times the current best distance, then we skip it (since |
---|
| 46 | // any point found in this child cannot be closer to the query point |
---|
| 47 | // by more than this factor.) Otherwise, we visit it recursively. |
---|
| 48 | // The distance between a box and the query point is computed exactly |
---|
| 49 | // (not approximated as is often done in kd-tree), using incremental |
---|
| 50 | // distance updates, as described by Arya and Mount in ``Algorithms |
---|
| 51 | // for fast vector quantization,'' Proc. of DCC '93: Data Compression |
---|
| 52 | // Conference, eds. J. A. Storer and M. Cohn, IEEE Press, 1993, |
---|
| 53 | // 381-390. |
---|
| 54 | // |
---|
| 55 | // The main entry points is annkSearch() which sets things up and |
---|
| 56 | // then call the recursive routine ann_search(). This is a recursive |
---|
| 57 | // routine which performs the processing for one node in the kd-tree. |
---|
| 58 | // There are two versions of this virtual procedure, one for splitting |
---|
| 59 | // nodes and one for leaves. When a splitting node is visited, we |
---|
| 60 | // determine which child to visit first (the closer one), and visit |
---|
| 61 | // the other child on return. When a leaf is visited, we compute |
---|
| 62 | // the distances to the points in the buckets, and update information |
---|
| 63 | // on the closest points. |
---|
| 64 | // |
---|
| 65 | // Some trickery is used to incrementally update the distance from |
---|
| 66 | // a kd-tree rectangle to the query point. This comes about from |
---|
| 67 | // the fact that which each successive split, only one component |
---|
| 68 | // (along the dimension that is split) of the squared distance to |
---|
| 69 | // the child rectangle is different from the squared distance to |
---|
| 70 | // the parent rectangle. |
---|
| 71 | //---------------------------------------------------------------------- |
---|
| 72 | |
---|
| 73 | //---------------------------------------------------------------------- |
---|
| 74 | // To keep argument lists short, a number of global variables |
---|
| 75 | // are maintained which are common to all the recursive calls. |
---|
| 76 | // These are given below. |
---|
| 77 | //---------------------------------------------------------------------- |
---|
| 78 | |
---|
| 79 | int ANNkdDim; // dimension of space |
---|
| 80 | ANNpoint ANNkdQ; // query point |
---|
| 81 | double ANNkdMaxErr; // max tolerable squared error |
---|
| 82 | ANNpointArray ANNkdPts; // the points |
---|
| 83 | ANNmin_k *ANNkdPointMK; // set of k closest points |
---|
| 84 | |
---|
| 85 | //---------------------------------------------------------------------- |
---|
| 86 | // annkSearch - search for the k nearest neighbors |
---|
| 87 | //---------------------------------------------------------------------- |
---|
| 88 | |
---|
| 89 | void ANNkd_tree::annkSearch( |
---|
| 90 | ANNpoint q, // the query point |
---|
| 91 | int k, // number of near neighbors to return |
---|
| 92 | ANNidxArray nn_idx, // nearest neighbor indices (returned) |
---|
| 93 | ANNdistArray dd, // the approximate nearest neighbor |
---|
| 94 | double eps) // the error bound |
---|
| 95 | { |
---|
| 96 | |
---|
| 97 | ANNkdDim = dim; // copy arguments to static equivs |
---|
| 98 | ANNkdQ = q; |
---|
| 99 | ANNkdPts = pts; |
---|
| 100 | ANNptsVisited = 0; // initialize count of points visited |
---|
| 101 | |
---|
| 102 | if (k > n_pts) { // too many near neighbors? |
---|
| 103 | annError("Requesting more near neighbors than data points", ANNabort); |
---|
| 104 | } |
---|
| 105 | |
---|
| 106 | ANNkdMaxErr = ANN_POW(1.0 + eps); |
---|
| 107 | ANN_FLOP(2) // increment floating op count |
---|
| 108 | |
---|
| 109 | ANNkdPointMK = new ANNmin_k(k); // create set for closest k points |
---|
| 110 | // search starting at the root |
---|
| 111 | root->ann_search(annBoxDistance(q, bnd_box_lo, bnd_box_hi, dim)); |
---|
| 112 | |
---|
| 113 | for (int i = 0; i < k; i++) { // extract the k-th closest points |
---|
| 114 | dd[i] = ANNkdPointMK->ith_smallest_key(i); |
---|
| 115 | nn_idx[i] = ANNkdPointMK->ith_smallest_info(i); |
---|
| 116 | } |
---|
| 117 | delete ANNkdPointMK; // deallocate closest point set |
---|
| 118 | } |
---|
| 119 | |
---|
| 120 | //---------------------------------------------------------------------- |
---|
| 121 | // kd_split::ann_search - search a splitting node |
---|
| 122 | //---------------------------------------------------------------------- |
---|
| 123 | |
---|
| 124 | void ANNkd_split::ann_search(ANNdist box_dist) |
---|
| 125 | { |
---|
| 126 | // check dist calc term condition |
---|
| 127 | if (ANNmaxPtsVisited != 0 && ANNptsVisited > ANNmaxPtsVisited) return; |
---|
| 128 | |
---|
| 129 | // distance to cutting plane |
---|
| 130 | ANNcoord cut_diff = ANNkdQ[cut_dim] - cut_val; |
---|
| 131 | |
---|
| 132 | if (cut_diff < 0) { // left of cutting plane |
---|
| 133 | child[ANN_LO]->ann_search(box_dist);// visit closer child first |
---|
| 134 | |
---|
| 135 | ANNcoord box_diff = cd_bnds[ANN_LO] - ANNkdQ[cut_dim]; |
---|
| 136 | if (box_diff < 0) // within bounds - ignore |
---|
| 137 | box_diff = 0; |
---|
| 138 | // distance to further box |
---|
| 139 | box_dist = (ANNdist) ANN_SUM(box_dist, |
---|
| 140 | ANN_DIFF(ANN_POW(box_diff), ANN_POW(cut_diff))); |
---|
| 141 | |
---|
| 142 | // visit further child if close enough |
---|
| 143 | if (box_dist * ANNkdMaxErr < ANNkdPointMK->max_key()) |
---|
| 144 | child[ANN_HI]->ann_search(box_dist); |
---|
| 145 | |
---|
| 146 | } |
---|
| 147 | else { // right of cutting plane |
---|
| 148 | child[ANN_HI]->ann_search(box_dist);// visit closer child first |
---|
| 149 | |
---|
| 150 | ANNcoord box_diff = ANNkdQ[cut_dim] - cd_bnds[ANN_HI]; |
---|
| 151 | if (box_diff < 0) // within bounds - ignore |
---|
| 152 | box_diff = 0; |
---|
| 153 | // distance to further box |
---|
| 154 | box_dist = (ANNdist) ANN_SUM(box_dist, |
---|
| 155 | ANN_DIFF(ANN_POW(box_diff), ANN_POW(cut_diff))); |
---|
| 156 | |
---|
| 157 | // visit further child if close enough |
---|
| 158 | if (box_dist * ANNkdMaxErr < ANNkdPointMK->max_key()) |
---|
| 159 | child[ANN_LO]->ann_search(box_dist); |
---|
| 160 | |
---|
| 161 | } |
---|
| 162 | ANN_FLOP(10) // increment floating ops |
---|
| 163 | ANN_SPL(1) // one more splitting node visited |
---|
| 164 | } |
---|
| 165 | |
---|
| 166 | //---------------------------------------------------------------------- |
---|
| 167 | // kd_leaf::ann_search - search points in a leaf node |
---|
| 168 | // Note: The unreadability of this code is the result of |
---|
| 169 | // some fine tuning to replace indexing by pointer operations. |
---|
| 170 | //---------------------------------------------------------------------- |
---|
| 171 | |
---|
| 172 | void ANNkd_leaf::ann_search(ANNdist box_dist) |
---|
| 173 | { |
---|
| 174 | register ANNdist dist; // distance to data point |
---|
| 175 | register ANNcoord* pp; // data coordinate pointer |
---|
| 176 | register ANNcoord* qq; // query coordinate pointer |
---|
| 177 | register ANNdist min_dist; // distance to k-th closest point |
---|
| 178 | register ANNcoord t; |
---|
| 179 | register int d; |
---|
| 180 | |
---|
| 181 | min_dist = ANNkdPointMK->max_key(); // k-th smallest distance so far |
---|
| 182 | |
---|
| 183 | for (int i = 0; i < n_pts; i++) { // check points in bucket |
---|
| 184 | |
---|
| 185 | pp = ANNkdPts[bkt[i]]; // first coord of next data point |
---|
| 186 | qq = ANNkdQ; // first coord of query point |
---|
| 187 | dist = 0; |
---|
| 188 | |
---|
| 189 | for(d = 0; d < ANNkdDim; d++) { |
---|
| 190 | ANN_COORD(1) // one more coordinate hit |
---|
| 191 | ANN_FLOP(4) // increment floating ops |
---|
| 192 | |
---|
| 193 | t = *(qq++) - *(pp++); // compute length and adv coordinate |
---|
| 194 | // exceeds dist to k-th smallest? |
---|
| 195 | if( (dist = ANN_SUM(dist, ANN_POW(t))) > min_dist) { |
---|
| 196 | break; |
---|
| 197 | } |
---|
| 198 | } |
---|
| 199 | |
---|
| 200 | if (d >= ANNkdDim && // among the k best? |
---|
| 201 | (ANN_ALLOW_SELF_MATCH || dist!=0)) { // and no self-match problem |
---|
| 202 | // add it to the list |
---|
| 203 | ANNkdPointMK->insert(dist, bkt[i]); |
---|
| 204 | min_dist = ANNkdPointMK->max_key(); |
---|
| 205 | } |
---|
| 206 | } |
---|
| 207 | ANN_LEAF(1) // one more leaf node visited |
---|
| 208 | ANN_PTS(n_pts) // increment points visited |
---|
| 209 | ANNptsVisited += n_pts; // increment number of points visited |
---|
| 210 | } |
---|