[37] | 1 | //---------------------------------------------------------------------- |
---|
| 2 | // File: ANN.h |
---|
| 3 | // Programmer: Sunil Arya and David Mount |
---|
| 4 | // Last modified: 05/03/05 (Release 1.1) |
---|
| 5 | // Description: Basic include file for approximate nearest |
---|
| 6 | // neighbor searching. |
---|
| 7 | //---------------------------------------------------------------------- |
---|
| 8 | // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and |
---|
| 9 | // David Mount. All Rights Reserved. |
---|
| 10 | // |
---|
| 11 | // This software and related documentation is part of the Approximate |
---|
| 12 | // Nearest Neighbor Library (ANN). This software is provided under |
---|
| 13 | // the provisions of the Lesser GNU Public License (LGPL). See the |
---|
| 14 | // file ../ReadMe.txt for further information. |
---|
| 15 | // |
---|
| 16 | // The University of Maryland (U.M.) and the authors make no |
---|
| 17 | // representations about the suitability or fitness of this software for |
---|
| 18 | // any purpose. It is provided "as is" without express or implied |
---|
| 19 | // warranty. |
---|
| 20 | //---------------------------------------------------------------------- |
---|
| 21 | // History: |
---|
| 22 | // Revision 0.1 03/04/98 |
---|
| 23 | // Initial release |
---|
| 24 | // Revision 1.0 04/01/05 |
---|
| 25 | // Added copyright and revision information |
---|
| 26 | // Added ANNcoordPrec for coordinate precision. |
---|
| 27 | // Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet. |
---|
| 28 | // Cleaned up C++ structure for modern compilers |
---|
| 29 | // Revision 1.1 05/03/05 |
---|
| 30 | // Added fixed-radius k-NN searching |
---|
| 31 | //---------------------------------------------------------------------- |
---|
| 32 | |
---|
| 33 | //---------------------------------------------------------------------- |
---|
| 34 | // ANN - approximate nearest neighbor searching |
---|
| 35 | // ANN is a library for approximate nearest neighbor searching, |
---|
| 36 | // based on the use of standard and priority search in kd-trees |
---|
| 37 | // and balanced box-decomposition (bbd) trees. Here are some |
---|
| 38 | // references to the main algorithmic techniques used here: |
---|
| 39 | // |
---|
| 40 | // kd-trees: |
---|
| 41 | // Friedman, Bentley, and Finkel, ``An algorithm for finding |
---|
| 42 | // best matches in logarithmic expected time,'' ACM |
---|
| 43 | // Transactions on Mathematical Software, 3(3):209-226, 1977. |
---|
| 44 | // |
---|
| 45 | // Priority search in kd-trees: |
---|
| 46 | // Arya and Mount, ``Algorithms for fast vector quantization,'' |
---|
| 47 | // Proc. of DCC '93: Data Compression Conference, eds. J. A. |
---|
| 48 | // Storer and M. Cohn, IEEE Press, 1993, 381-390. |
---|
| 49 | // |
---|
| 50 | // Approximate nearest neighbor search and bbd-trees: |
---|
| 51 | // Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal |
---|
| 52 | // algorithm for approximate nearest neighbor searching,'' |
---|
| 53 | // 5th Ann. ACM-SIAM Symposium on Discrete Algorithms, |
---|
| 54 | // 1994, 573-582. |
---|
| 55 | //---------------------------------------------------------------------- |
---|
| 56 | |
---|
| 57 | #ifndef ANN_H |
---|
| 58 | #define ANN_H |
---|
| 59 | |
---|
| 60 | #ifdef WIN32 |
---|
| 61 | //---------------------------------------------------------------------- |
---|
| 62 | // For Microsoft Visual C++, externally accessible symbols must be |
---|
| 63 | // explicitly indicated with DLL_API, which is somewhat like "extern." |
---|
| 64 | // |
---|
| 65 | // The following ifdef block is the standard way of creating macros |
---|
| 66 | // which make exporting from a DLL simpler. All files within this DLL |
---|
| 67 | // are compiled with the DLL_EXPORTS preprocessor symbol defined on the |
---|
| 68 | // command line. In contrast, projects that use (or import) the DLL |
---|
| 69 | // objects do not define the DLL_EXPORTS symbol. This way any other |
---|
| 70 | // project whose source files include this file see DLL_API functions as |
---|
| 71 | // being imported from a DLL, wheras this DLL sees symbols defined with |
---|
| 72 | // this macro as being exported. |
---|
| 73 | //---------------------------------------------------------------------- |
---|
| 74 | #ifdef DLL_EXPORTS |
---|
| 75 | #define DLL_API __declspec(dllexport) |
---|
| 76 | #else |
---|
| 77 | #define DLL_API __declspec(dllimport) |
---|
| 78 | #endif |
---|
| 79 | //---------------------------------------------------------------------- |
---|
| 80 | // DLL_API is ignored for all other systems |
---|
| 81 | //---------------------------------------------------------------------- |
---|
| 82 | #else |
---|
| 83 | #define DLL_API |
---|
| 84 | #endif |
---|
| 85 | |
---|
| 86 | //---------------------------------------------------------------------- |
---|
| 87 | // basic includes |
---|
| 88 | //---------------------------------------------------------------------- |
---|
| 89 | |
---|
| 90 | #include <cmath> // math includes |
---|
| 91 | #include <iostream> // I/O streams |
---|
| 92 | |
---|
| 93 | //---------------------------------------------------------------------- |
---|
| 94 | // Limits |
---|
| 95 | // There are a number of places where we use the maximum double value as |
---|
| 96 | // default initializers (and others may be used, depending on the |
---|
| 97 | // data/distance representation). These can usually be found in limits.h |
---|
| 98 | // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX). |
---|
| 99 | // |
---|
| 100 | // Not all systems have these files. If you are using such a system, |
---|
| 101 | // you should set the preprocessor symbol ANN_NO_LIMITS_H when |
---|
| 102 | // compiling, and modify the statements below to generate the |
---|
| 103 | // appropriate value. For practical purposes, this does not need to be |
---|
| 104 | // the maximum double value. It is sufficient that it be at least as |
---|
| 105 | // large than the maximum squared distance between between any two |
---|
| 106 | // points. |
---|
| 107 | //---------------------------------------------------------------------- |
---|
| 108 | #ifdef ANN_NO_LIMITS_H // limits.h unavailable |
---|
| 109 | #include <cvalues> // replacement for limits.h |
---|
| 110 | const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double |
---|
| 111 | #else |
---|
| 112 | #include <climits> |
---|
| 113 | #include <cfloat> |
---|
| 114 | const double ANN_DBL_MAX = DBL_MAX; |
---|
| 115 | #endif |
---|
| 116 | |
---|
| 117 | #define ANNversion "1.1.1" // ANN version and information |
---|
| 118 | #define ANNversionCmt "" |
---|
| 119 | #define ANNcopyright "David M. Mount and Sunil Arya" |
---|
| 120 | #define ANNlatestRev "Aug 4, 2006" |
---|
| 121 | |
---|
| 122 | //---------------------------------------------------------------------- |
---|
| 123 | // ANNbool |
---|
| 124 | // This is a simple boolean type. Although ANSI C++ is supposed |
---|
| 125 | // to support the type bool, some compilers do not have it. |
---|
| 126 | //---------------------------------------------------------------------- |
---|
| 127 | |
---|
| 128 | enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++) |
---|
| 129 | |
---|
| 130 | //---------------------------------------------------------------------- |
---|
| 131 | // ANNcoord, ANNdist |
---|
| 132 | // ANNcoord and ANNdist are the types used for representing |
---|
| 133 | // point coordinates and distances. They can be modified by the |
---|
| 134 | // user, with some care. It is assumed that they are both numeric |
---|
| 135 | // types, and that ANNdist is generally of an equal or higher type |
---|
| 136 | // from ANNcoord. A variable of type ANNdist should be large |
---|
| 137 | // enough to store the sum of squared components of a variable |
---|
| 138 | // of type ANNcoord for the number of dimensions needed in the |
---|
| 139 | // application. For example, the following combinations are |
---|
| 140 | // legal: |
---|
| 141 | // |
---|
| 142 | // ANNcoord ANNdist |
---|
| 143 | // --------- ------------------------------- |
---|
| 144 | // short short, int, long, float, double |
---|
| 145 | // int int, long, float, double |
---|
| 146 | // long long, float, double |
---|
| 147 | // float float, double |
---|
| 148 | // double double |
---|
| 149 | // |
---|
| 150 | // It is the user's responsibility to make sure that overflow does |
---|
| 151 | // not occur in distance calculation. |
---|
| 152 | //---------------------------------------------------------------------- |
---|
| 153 | |
---|
| 154 | typedef double ANNcoord; // coordinate data type |
---|
| 155 | typedef double ANNdist; // distance data type |
---|
| 156 | |
---|
| 157 | //---------------------------------------------------------------------- |
---|
| 158 | // ANNidx |
---|
| 159 | // ANNidx is a point index. When the data structure is built, the |
---|
| 160 | // points are given as an array. Nearest neighbor results are |
---|
| 161 | // returned as an integer index into this array. To make it |
---|
| 162 | // clearer when this is happening, we define the integer type |
---|
| 163 | // ANNidx. Indexing starts from 0. |
---|
| 164 | // |
---|
| 165 | // For fixed-radius near neighbor searching, it is possible that |
---|
| 166 | // there are not k nearest neighbors within the search radius. To |
---|
| 167 | // indicate this, the algorithm returns ANN_NULL_IDX as its result. |
---|
| 168 | // It should be distinguishable from any valid array index. |
---|
| 169 | //---------------------------------------------------------------------- |
---|
| 170 | |
---|
| 171 | typedef int ANNidx; // point index |
---|
| 172 | const ANNidx ANN_NULL_IDX = -1; // a NULL point index |
---|
| 173 | |
---|
| 174 | //---------------------------------------------------------------------- |
---|
| 175 | // Infinite distance: |
---|
| 176 | // The code assumes that there is an "infinite distance" which it |
---|
| 177 | // uses to initialize distances before performing nearest neighbor |
---|
| 178 | // searches. It should be as larger or larger than any legitimate |
---|
| 179 | // nearest neighbor distance. |
---|
| 180 | // |
---|
| 181 | // On most systems, these should be found in the standard include |
---|
| 182 | // file <limits.h> or possibly <float.h>. If you do not have these |
---|
| 183 | // file, some suggested values are listed below, assuming 64-bit |
---|
| 184 | // long, 32-bit int and 16-bit short. |
---|
| 185 | // |
---|
| 186 | // ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>) |
---|
| 187 | // ------- ------------ ------------------------------------ |
---|
| 188 | // double DBL_MAX 1.79769313486231570e+308 |
---|
| 189 | // float FLT_MAX 3.40282346638528860e+38 |
---|
| 190 | // long LONG_MAX 0x7fffffffffffffff |
---|
| 191 | // int INT_MAX 0x7fffffff |
---|
| 192 | // short SHRT_MAX 0x7fff |
---|
| 193 | //---------------------------------------------------------------------- |
---|
| 194 | |
---|
| 195 | const ANNdist ANN_DIST_INF = ANN_DBL_MAX; |
---|
| 196 | |
---|
| 197 | //---------------------------------------------------------------------- |
---|
| 198 | // Significant digits for tree dumps: |
---|
| 199 | // When floating point coordinates are used, the routine that dumps |
---|
| 200 | // a tree needs to know roughly how many significant digits there |
---|
| 201 | // are in a ANNcoord, so it can output points to full precision. |
---|
| 202 | // This is defined to be ANNcoordPrec. On most systems these |
---|
| 203 | // values can be found in the standard include files <limits.h> or |
---|
| 204 | // <float.h>. For integer types, the value is essentially ignored. |
---|
| 205 | // |
---|
| 206 | // ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>) |
---|
| 207 | // -------- ------------ ------------------------------------ |
---|
| 208 | // double DBL_DIG 15 |
---|
| 209 | // float FLT_DIG 6 |
---|
| 210 | // long doesn't matter 19 |
---|
| 211 | // int doesn't matter 10 |
---|
| 212 | // short doesn't matter 5 |
---|
| 213 | //---------------------------------------------------------------------- |
---|
| 214 | |
---|
| 215 | #ifdef DBL_DIG // number of sig. bits in ANNcoord |
---|
| 216 | const int ANNcoordPrec = DBL_DIG; |
---|
| 217 | #else |
---|
| 218 | const int ANNcoordPrec = 15; // default precision |
---|
| 219 | #endif |
---|
| 220 | |
---|
| 221 | //---------------------------------------------------------------------- |
---|
| 222 | // Self match? |
---|
| 223 | // In some applications, the nearest neighbor of a point is not |
---|
| 224 | // allowed to be the point itself. This occurs, for example, when |
---|
| 225 | // computing all nearest neighbors in a set. By setting the |
---|
| 226 | // parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor |
---|
| 227 | // is the closest point whose distance from the query point is |
---|
| 228 | // strictly positive. |
---|
| 229 | //---------------------------------------------------------------------- |
---|
| 230 | |
---|
| 231 | const ANNbool ANN_ALLOW_SELF_MATCH = ANNtrue; |
---|
| 232 | |
---|
| 233 | //---------------------------------------------------------------------- |
---|
| 234 | // Norms and metrics: |
---|
| 235 | // ANN supports any Minkowski norm for defining distance. In |
---|
| 236 | // particular, for any p >= 1, the L_p Minkowski norm defines the |
---|
| 237 | // length of a d-vector (v0, v1, ..., v(d-1)) to be |
---|
| 238 | // |
---|
| 239 | // (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p), |
---|
| 240 | // |
---|
| 241 | // (where ^ denotes exponentiation, and |.| denotes absolute |
---|
| 242 | // value). The distance between two points is defined to be the |
---|
| 243 | // norm of the vector joining them. Some common distance metrics |
---|
| 244 | // include |
---|
| 245 | // |
---|
| 246 | // Euclidean metric p = 2 |
---|
| 247 | // Manhattan metric p = 1 |
---|
| 248 | // Max metric p = infinity |
---|
| 249 | // |
---|
| 250 | // In the case of the max metric, the norm is computed by taking |
---|
| 251 | // the maxima of the absolute values of the components. ANN is |
---|
| 252 | // highly "coordinate-based" and does not support general distances |
---|
| 253 | // functions (e.g. those obeying just the triangle inequality). It |
---|
| 254 | // also does not support distance functions based on |
---|
| 255 | // inner-products. |
---|
| 256 | // |
---|
| 257 | // For the purpose of computing nearest neighbors, it is not |
---|
| 258 | // necessary to compute the final power (1/p). Thus the only |
---|
| 259 | // component that is used by the program is |v(i)|^p. |
---|
| 260 | // |
---|
| 261 | // ANN parameterizes the distance computation through the following |
---|
| 262 | // macros. (Macros are used rather than procedures for |
---|
| 263 | // efficiency.) Recall that the distance between two points is |
---|
| 264 | // given by the length of the vector joining them, and the length |
---|
| 265 | // or norm of a vector v is given by formula: |
---|
| 266 | // |
---|
| 267 | // |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1))) |
---|
| 268 | // |
---|
| 269 | // where ROOT, POW are unary functions and # is an associative and |
---|
| 270 | // commutative binary operator mapping the following types: |
---|
| 271 | // |
---|
| 272 | // ** POW: ANNcoord --> ANNdist |
---|
| 273 | // ** #: ANNdist x ANNdist --> ANNdist |
---|
| 274 | // ** ROOT: ANNdist (>0) --> double |
---|
| 275 | // |
---|
| 276 | // For early termination in distance calculation (partial distance |
---|
| 277 | // calculation) we assume that POW and # together are monotonically |
---|
| 278 | // increasing on sequences of arguments, meaning that for all |
---|
| 279 | // v0..vk and y: |
---|
| 280 | // |
---|
| 281 | // POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y). |
---|
| 282 | // |
---|
| 283 | // Incremental Distance Calculation: |
---|
| 284 | // The program uses an optimized method of computing distances for |
---|
| 285 | // kd-trees and bd-trees, called incremental distance calculation. |
---|
| 286 | // It is used when distances are to be updated when only a single |
---|
| 287 | // coordinate of a point has been changed. In order to use this, |
---|
| 288 | // we assume that there is an incremental update function DIFF(x,y) |
---|
| 289 | // for #, such that if: |
---|
| 290 | // |
---|
| 291 | // s = x0 # ... # xi # ... # xk |
---|
| 292 | // |
---|
| 293 | // then if s' is equal to s but with xi replaced by y, that is, |
---|
| 294 | // |
---|
| 295 | // s' = x0 # ... # y # ... # xk |
---|
| 296 | // |
---|
| 297 | // then the length of s' can be computed by: |
---|
| 298 | // |
---|
| 299 | // |s'| = |s| # DIFF(xi,y). |
---|
| 300 | // |
---|
| 301 | // Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity |
---|
| 302 | // norm we make use of the fact that in the program this function |
---|
| 303 | // is only invoked when y > xi, and hence DIFF(xi,y)=y. |
---|
| 304 | // |
---|
| 305 | // Finally, for approximate nearest neighbor queries we assume |
---|
| 306 | // that POW and ROOT are related such that |
---|
| 307 | // |
---|
| 308 | // v*ROOT(x) = ROOT(POW(v)*x) |
---|
| 309 | // |
---|
| 310 | // Here are the values for the various Minkowski norms: |
---|
| 311 | // |
---|
| 312 | // L_p: p even: p odd: |
---|
| 313 | // ------------------------- ------------------------ |
---|
| 314 | // POW(v) = v^p POW(v) = |v|^p |
---|
| 315 | // ROOT(x) = x^(1/p) ROOT(x) = x^(1/p) |
---|
| 316 | // # = + # = + |
---|
| 317 | // DIFF(x,y) = y - x DIFF(x,y) = y - x |
---|
| 318 | // |
---|
| 319 | // L_inf: |
---|
| 320 | // POW(v) = |v| |
---|
| 321 | // ROOT(x) = x |
---|
| 322 | // # = max |
---|
| 323 | // DIFF(x,y) = y |
---|
| 324 | // |
---|
| 325 | // By default the Euclidean norm is assumed. To change the norm, |
---|
| 326 | // uncomment the appropriate set of macros below. |
---|
| 327 | //---------------------------------------------------------------------- |
---|
| 328 | |
---|
| 329 | //---------------------------------------------------------------------- |
---|
| 330 | // Use the following for the Euclidean norm |
---|
| 331 | //---------------------------------------------------------------------- |
---|
| 332 | #define ANN_POW(v) ((v)*(v)) |
---|
| 333 | #define ANN_ROOT(x) sqrt(x) |
---|
| 334 | #define ANN_SUM(x,y) ((x) + (y)) |
---|
| 335 | #define ANN_DIFF(x,y) ((y) - (x)) |
---|
| 336 | |
---|
| 337 | //---------------------------------------------------------------------- |
---|
| 338 | // Use the following for the L_1 (Manhattan) norm |
---|
| 339 | //---------------------------------------------------------------------- |
---|
| 340 | // #define ANN_POW(v) fabs(v) |
---|
| 341 | // #define ANN_ROOT(x) (x) |
---|
| 342 | // #define ANN_SUM(x,y) ((x) + (y)) |
---|
| 343 | // #define ANN_DIFF(x,y) ((y) - (x)) |
---|
| 344 | |
---|
| 345 | //---------------------------------------------------------------------- |
---|
| 346 | // Use the following for a general L_p norm |
---|
| 347 | //---------------------------------------------------------------------- |
---|
| 348 | // #define ANN_POW(v) pow(fabs(v),p) |
---|
| 349 | // #define ANN_ROOT(x) pow(fabs(x),1/p) |
---|
| 350 | // #define ANN_SUM(x,y) ((x) + (y)) |
---|
| 351 | // #define ANN_DIFF(x,y) ((y) - (x)) |
---|
| 352 | |
---|
| 353 | //---------------------------------------------------------------------- |
---|
| 354 | // Use the following for the L_infinity (Max) norm |
---|
| 355 | //---------------------------------------------------------------------- |
---|
| 356 | // #define ANN_POW(v) fabs(v) |
---|
| 357 | // #define ANN_ROOT(x) (x) |
---|
| 358 | // #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y)) |
---|
| 359 | // #define ANN_DIFF(x,y) (y) |
---|
| 360 | |
---|
| 361 | //---------------------------------------------------------------------- |
---|
| 362 | // Array types |
---|
| 363 | // The following array types are of basic interest. A point is |
---|
| 364 | // just a dimensionless array of coordinates, a point array is a |
---|
| 365 | // dimensionless array of points. A distance array is a |
---|
| 366 | // dimensionless array of distances and an index array is a |
---|
| 367 | // dimensionless array of point indices. The latter two are used |
---|
| 368 | // when returning the results of k-nearest neighbor queries. |
---|
| 369 | //---------------------------------------------------------------------- |
---|
| 370 | |
---|
| 371 | typedef ANNcoord* ANNpoint; // a point |
---|
| 372 | typedef ANNpoint* ANNpointArray; // an array of points |
---|
| 373 | typedef ANNdist* ANNdistArray; // an array of distances |
---|
| 374 | typedef ANNidx* ANNidxArray; // an array of point indices |
---|
| 375 | |
---|
| 376 | //---------------------------------------------------------------------- |
---|
| 377 | // Basic point and array utilities: |
---|
| 378 | // The following procedures are useful supplements to ANN's nearest |
---|
| 379 | // neighbor capabilities. |
---|
| 380 | // |
---|
| 381 | // annDist(): |
---|
| 382 | // Computes the (squared) distance between a pair of points. |
---|
| 383 | // Note that this routine is not used internally by ANN for |
---|
| 384 | // computing distance calculations. For reasons of efficiency |
---|
| 385 | // this is done using incremental distance calculation. Thus, |
---|
| 386 | // this routine cannot be modified as a method of changing the |
---|
| 387 | // metric. |
---|
| 388 | // |
---|
| 389 | // Because points (somewhat like strings in C) are stored as |
---|
| 390 | // pointers. Consequently, creating and destroying copies of |
---|
| 391 | // points may require storage allocation. These procedures do |
---|
| 392 | // this. |
---|
| 393 | // |
---|
| 394 | // annAllocPt() and annDeallocPt(): |
---|
| 395 | // Allocate a deallocate storage for a single point, and |
---|
| 396 | // return a pointer to it. The argument to AllocPt() is |
---|
| 397 | // used to initialize all components. |
---|
| 398 | // |
---|
| 399 | // annAllocPts() and annDeallocPts(): |
---|
| 400 | // Allocate and deallocate an array of points as well a |
---|
| 401 | // place to store their coordinates, and initializes the |
---|
| 402 | // points to point to their respective coordinates. It |
---|
| 403 | // allocates point storage in a contiguous block large |
---|
| 404 | // enough to store all the points. It performs no |
---|
| 405 | // initialization. |
---|
| 406 | // |
---|
| 407 | // annCopyPt(): |
---|
| 408 | // Creates a copy of a given point, allocating space for |
---|
| 409 | // the new point. It returns a pointer to the newly |
---|
| 410 | // allocated copy. |
---|
| 411 | //---------------------------------------------------------------------- |
---|
| 412 | |
---|
| 413 | DLL_API ANNdist annDist( |
---|
| 414 | int dim, // dimension of space |
---|
| 415 | ANNpoint p, // points |
---|
| 416 | ANNpoint q); |
---|
| 417 | |
---|
| 418 | DLL_API ANNpoint annAllocPt( |
---|
| 419 | int dim, // dimension |
---|
| 420 | ANNcoord c = 0); // coordinate value (all equal) |
---|
| 421 | |
---|
| 422 | DLL_API ANNpointArray annAllocPts( |
---|
| 423 | int n, // number of points |
---|
| 424 | int dim); // dimension |
---|
| 425 | |
---|
| 426 | DLL_API void annDeallocPt( |
---|
| 427 | ANNpoint &p); // deallocate 1 point |
---|
| 428 | |
---|
| 429 | DLL_API void annDeallocPts( |
---|
| 430 | ANNpointArray &pa); // point array |
---|
| 431 | |
---|
| 432 | DLL_API ANNpoint annCopyPt( |
---|
| 433 | int dim, // dimension |
---|
| 434 | ANNpoint source); // point to copy |
---|
| 435 | |
---|
| 436 | //---------------------------------------------------------------------- |
---|
| 437 | //Overall structure: ANN supports a number of different data structures |
---|
| 438 | //for approximate and exact nearest neighbor searching. These are: |
---|
| 439 | // |
---|
| 440 | // ANNbruteForce A simple brute-force search structure. |
---|
| 441 | // ANNkd_tree A kd-tree tree search structure. ANNbd_tree |
---|
| 442 | // A bd-tree tree search structure (a kd-tree with shrink |
---|
| 443 | // capabilities). |
---|
| 444 | // |
---|
| 445 | // At a minimum, each of these data structures support k-nearest |
---|
| 446 | // neighbor queries. The nearest neighbor query, annkSearch, |
---|
| 447 | // returns an integer identifier and the distance to the nearest |
---|
| 448 | // neighbor(s) and annRangeSearch returns the nearest points that |
---|
| 449 | // lie within a given query ball. |
---|
| 450 | // |
---|
| 451 | // Each structure is built by invoking the appropriate constructor |
---|
| 452 | // and passing it (at a minimum) the array of points, the total |
---|
| 453 | // number of points and the dimension of the space. Each structure |
---|
| 454 | // is also assumed to support a destructor and member functions |
---|
| 455 | // that return basic information about the point set. |
---|
| 456 | // |
---|
| 457 | // Note that the array of points is not copied by the data |
---|
| 458 | // structure (for reasons of space efficiency), and it is assumed |
---|
| 459 | // to be constant throughout the lifetime of the search structure. |
---|
| 460 | // |
---|
| 461 | // The search algorithm, annkSearch, is given the query point (q), |
---|
| 462 | // and the desired number of nearest neighbors to report (k), and |
---|
| 463 | // the error bound (eps) (whose default value is 0, implying exact |
---|
| 464 | // nearest neighbors). It returns two arrays which are assumed to |
---|
| 465 | // contain at least k elements: one (nn_idx) contains the indices |
---|
| 466 | // (within the point array) of the nearest neighbors and the other |
---|
| 467 | // (dd) contains the squared distances to these nearest neighbors. |
---|
| 468 | // |
---|
| 469 | // The search algorithm, annkFRSearch, is a fixed-radius kNN |
---|
| 470 | // search. In addition to a query point, it is given a (squared) |
---|
| 471 | // radius bound. (This is done for consistency, because the search |
---|
| 472 | // returns distances as squared quantities.) It does two things. |
---|
| 473 | // First, it computes the k nearest neighbors within the radius |
---|
| 474 | // bound, and second, it returns the total number of points lying |
---|
| 475 | // within the radius bound. It is permitted to set k = 0, in which |
---|
| 476 | // case it effectively answers a range counting query. If the |
---|
| 477 | // error bound epsilon is positive, then the search is approximate |
---|
| 478 | // in the sense that it is free to ignore any point that lies |
---|
| 479 | // outside a ball of radius r/(1+epsilon), where r is the given |
---|
| 480 | // (unsquared) radius bound. |
---|
| 481 | // |
---|
| 482 | // The generic object from which all the search structures are |
---|
| 483 | // dervied is given below. It is a virtual object, and is useless |
---|
| 484 | // by itself. |
---|
| 485 | //---------------------------------------------------------------------- |
---|
| 486 | |
---|
| 487 | class DLL_API ANNpointSet { |
---|
| 488 | public: |
---|
| 489 | virtual ~ANNpointSet() {} // virtual distructor |
---|
| 490 | |
---|
| 491 | virtual void annkSearch( // approx k near neighbor search |
---|
| 492 | ANNpoint q, // query point |
---|
| 493 | int k, // number of near neighbors to return |
---|
| 494 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
---|
| 495 | ANNdistArray dd, // dist to near neighbors (modified) |
---|
| 496 | double eps=0.0 // error bound |
---|
| 497 | ) = 0; // pure virtual (defined elsewhere) |
---|
| 498 | |
---|
| 499 | virtual int annkFRSearch( // approx fixed-radius kNN search |
---|
| 500 | ANNpoint q, // query point |
---|
| 501 | ANNdist sqRad, // squared radius |
---|
| 502 | int k = 0, // number of near neighbors to return |
---|
| 503 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
---|
| 504 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
---|
| 505 | double eps=0.0 // error bound |
---|
| 506 | ) = 0; // pure virtual (defined elsewhere) |
---|
| 507 | |
---|
| 508 | virtual int theDim() = 0; // return dimension of space |
---|
| 509 | virtual int nPoints() = 0; // return number of points |
---|
| 510 | // return pointer to points |
---|
| 511 | virtual ANNpointArray thePoints() = 0; |
---|
| 512 | }; |
---|
| 513 | |
---|
| 514 | //---------------------------------------------------------------------- |
---|
| 515 | // Brute-force nearest neighbor search: |
---|
| 516 | // The brute-force search structure is very simple but inefficient. |
---|
| 517 | // It has been provided primarily for the sake of comparison with |
---|
| 518 | // and validation of the more complex search structures. |
---|
| 519 | // |
---|
| 520 | // Query processing is the same as described above, but the value |
---|
| 521 | // of epsilon is ignored, since all distance calculations are |
---|
| 522 | // performed exactly. |
---|
| 523 | // |
---|
| 524 | // WARNING: This data structure is very slow, and should not be |
---|
| 525 | // used unless the number of points is very small. |
---|
| 526 | // |
---|
| 527 | // Internal information: |
---|
| 528 | // --------------------- |
---|
| 529 | // This data structure bascially consists of the array of points |
---|
| 530 | // (each a pointer to an array of coordinates). The search is |
---|
| 531 | // performed by a simple linear scan of all the points. |
---|
| 532 | //---------------------------------------------------------------------- |
---|
| 533 | |
---|
| 534 | class DLL_API ANNbruteForce: public ANNpointSet { |
---|
| 535 | int dim; // dimension |
---|
| 536 | int n_pts; // number of points |
---|
| 537 | ANNpointArray pts; // point array |
---|
| 538 | public: |
---|
| 539 | ANNbruteForce( // constructor from point array |
---|
| 540 | ANNpointArray pa, // point array |
---|
| 541 | int n, // number of points |
---|
| 542 | int dd); // dimension |
---|
| 543 | |
---|
| 544 | ~ANNbruteForce(); // destructor |
---|
| 545 | |
---|
| 546 | void annkSearch( // approx k near neighbor search |
---|
| 547 | ANNpoint q, // query point |
---|
| 548 | int k, // number of near neighbors to return |
---|
| 549 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
---|
| 550 | ANNdistArray dd, // dist to near neighbors (modified) |
---|
| 551 | double eps=0.0); // error bound |
---|
| 552 | |
---|
| 553 | int annkFRSearch( // approx fixed-radius kNN search |
---|
| 554 | ANNpoint q, // query point |
---|
| 555 | ANNdist sqRad, // squared radius |
---|
| 556 | int k = 0, // number of near neighbors to return |
---|
| 557 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
---|
| 558 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
---|
| 559 | double eps=0.0); // error bound |
---|
| 560 | |
---|
| 561 | int theDim() // return dimension of space |
---|
| 562 | { return dim; } |
---|
| 563 | |
---|
| 564 | int nPoints() // return number of points |
---|
| 565 | { return n_pts; } |
---|
| 566 | |
---|
| 567 | ANNpointArray thePoints() // return pointer to points |
---|
| 568 | { return pts; } |
---|
| 569 | }; |
---|
| 570 | |
---|
| 571 | //---------------------------------------------------------------------- |
---|
| 572 | // kd- and bd-tree splitting and shrinking rules |
---|
| 573 | // kd-trees supports a collection of different splitting rules. |
---|
| 574 | // In addition to the standard kd-tree splitting rule proposed |
---|
| 575 | // by Friedman, Bentley, and Finkel, we have introduced a |
---|
| 576 | // number of other splitting rules, which seem to perform |
---|
| 577 | // as well or better (for the distributions we have tested). |
---|
| 578 | // |
---|
| 579 | // The splitting methods given below allow the user to tailor |
---|
| 580 | // the data structure to the particular data set. They are |
---|
| 581 | // are described in greater details in the kd_split.cc source |
---|
| 582 | // file. The method ANN_KD_SUGGEST is the method chosen (rather |
---|
| 583 | // subjectively) by the implementors as the one giving the |
---|
| 584 | // fastest performance, and is the default splitting method. |
---|
| 585 | // |
---|
| 586 | // As with splitting rules, there are a number of different |
---|
| 587 | // shrinking rules. The shrinking rule ANN_BD_NONE does no |
---|
| 588 | // shrinking (and hence produces a kd-tree tree). The rule |
---|
| 589 | // ANN_BD_SUGGEST uses the implementors favorite rule. |
---|
| 590 | //---------------------------------------------------------------------- |
---|
| 591 | |
---|
| 592 | enum ANNsplitRule { |
---|
| 593 | ANN_KD_STD = 0, // the optimized kd-splitting rule |
---|
| 594 | ANN_KD_MIDPT = 1, // midpoint split |
---|
| 595 | ANN_KD_FAIR = 2, // fair split |
---|
| 596 | ANN_KD_SL_MIDPT = 3, // sliding midpoint splitting method |
---|
| 597 | ANN_KD_SL_FAIR = 4, // sliding fair split method |
---|
| 598 | ANN_KD_SUGGEST = 5}; // the authors' suggestion for best |
---|
| 599 | const int ANN_N_SPLIT_RULES = 6; // number of split rules |
---|
| 600 | |
---|
| 601 | enum ANNshrinkRule { |
---|
| 602 | ANN_BD_NONE = 0, // no shrinking at all (just kd-tree) |
---|
| 603 | ANN_BD_SIMPLE = 1, // simple splitting |
---|
| 604 | ANN_BD_CENTROID = 2, // centroid splitting |
---|
| 605 | ANN_BD_SUGGEST = 3}; // the authors' suggested choice |
---|
| 606 | const int ANN_N_SHRINK_RULES = 4; // number of shrink rules |
---|
| 607 | |
---|
| 608 | //---------------------------------------------------------------------- |
---|
| 609 | // kd-tree: |
---|
| 610 | // The main search data structure supported by ANN is a kd-tree. |
---|
| 611 | // The main constructor is given a set of points and a choice of |
---|
| 612 | // splitting method to use in building the tree. |
---|
| 613 | // |
---|
| 614 | // Construction: |
---|
| 615 | // ------------- |
---|
| 616 | // The constructor is given the point array, number of points, |
---|
| 617 | // dimension, bucket size (default = 1), and the splitting rule |
---|
| 618 | // (default = ANN_KD_SUGGEST). The point array is not copied, and |
---|
| 619 | // is assumed to be kept constant throughout the lifetime of the |
---|
| 620 | // search structure. There is also a "load" constructor that |
---|
| 621 | // builds a tree from a file description that was created by the |
---|
| 622 | // Dump operation. |
---|
| 623 | // |
---|
| 624 | // Search: |
---|
| 625 | // ------- |
---|
| 626 | // There are two search methods: |
---|
| 627 | // |
---|
| 628 | // Standard search (annkSearch()): |
---|
| 629 | // Searches nodes in tree-traversal order, always visiting |
---|
| 630 | // the closer child first. |
---|
| 631 | // Priority search (annkPriSearch()): |
---|
| 632 | // Searches nodes in order of increasing distance of the |
---|
| 633 | // associated cell from the query point. For many |
---|
| 634 | // distributions the standard search seems to work just |
---|
| 635 | // fine, but priority search is safer for worst-case |
---|
| 636 | // performance. |
---|
| 637 | // |
---|
| 638 | // Printing: |
---|
| 639 | // --------- |
---|
| 640 | // There are two methods provided for printing the tree. Print() |
---|
| 641 | // is used to produce a "human-readable" display of the tree, with |
---|
| 642 | // indenation, which is handy for debugging. Dump() produces a |
---|
| 643 | // format that is suitable reading by another program. There is a |
---|
| 644 | // "load" constructor, which constructs a tree which is assumed to |
---|
| 645 | // have been saved by the Dump() procedure. |
---|
| 646 | // |
---|
| 647 | // Performance and Structure Statistics: |
---|
| 648 | // ------------------------------------- |
---|
| 649 | // The procedure getStats() collects statistics information on the |
---|
| 650 | // tree (its size, height, etc.) See ANNperf.h for information on |
---|
| 651 | // the stats structure it returns. |
---|
| 652 | // |
---|
| 653 | // Internal information: |
---|
| 654 | // --------------------- |
---|
| 655 | // The data structure consists of three major chunks of storage. |
---|
| 656 | // The first (implicit) storage are the points themselves (pts), |
---|
| 657 | // which have been provided by the users as an argument to the |
---|
| 658 | // constructor, or are allocated dynamically if the tree is built |
---|
| 659 | // using the load constructor). These should not be changed during |
---|
| 660 | // the lifetime of the search structure. It is the user's |
---|
| 661 | // responsibility to delete these after the tree is destroyed. |
---|
| 662 | // |
---|
| 663 | // The second is the tree itself (which is dynamically allocated in |
---|
| 664 | // the constructor) and is given as a pointer to its root node |
---|
| 665 | // (root). These nodes are automatically deallocated when the tree |
---|
| 666 | // is deleted. See the file src/kd_tree.h for further information |
---|
| 667 | // on the structure of the tree nodes. |
---|
| 668 | // |
---|
| 669 | // Each leaf of the tree does not contain a pointer directly to a |
---|
| 670 | // point, but rather contains a pointer to a "bucket", which is an |
---|
| 671 | // array consisting of point indices. The third major chunk of |
---|
| 672 | // storage is an array (pidx), which is a large array in which all |
---|
| 673 | // these bucket subarrays reside. (The reason for storing them |
---|
| 674 | // separately is the buckets are typically small, but of varying |
---|
| 675 | // sizes. This was done to avoid fragmentation.) This array is |
---|
| 676 | // also deallocated when the tree is deleted. |
---|
| 677 | // |
---|
| 678 | // In addition to this, the tree consists of a number of other |
---|
| 679 | // pieces of information which are used in searching and for |
---|
| 680 | // subsequent tree operations. These consist of the following: |
---|
| 681 | // |
---|
| 682 | // dim Dimension of space |
---|
| 683 | // n_pts Number of points currently in the tree |
---|
| 684 | // n_max Maximum number of points that are allowed |
---|
| 685 | // in the tree |
---|
| 686 | // bkt_size Maximum bucket size (no. of points per leaf) |
---|
| 687 | // bnd_box_lo Bounding box low point |
---|
| 688 | // bnd_box_hi Bounding box high point |
---|
| 689 | // splitRule Splitting method used |
---|
| 690 | // |
---|
| 691 | //---------------------------------------------------------------------- |
---|
| 692 | |
---|
| 693 | //---------------------------------------------------------------------- |
---|
| 694 | // Some types and objects used by kd-tree functions |
---|
| 695 | // See src/kd_tree.h and src/kd_tree.cpp for definitions |
---|
| 696 | //---------------------------------------------------------------------- |
---|
| 697 | class ANNkdStats; // stats on kd-tree |
---|
| 698 | class ANNkd_node; // generic node in a kd-tree |
---|
| 699 | typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node |
---|
| 700 | |
---|
| 701 | class DLL_API ANNkd_tree: public ANNpointSet { |
---|
| 702 | protected: |
---|
| 703 | int dim; // dimension of space |
---|
| 704 | int n_pts; // number of points in tree |
---|
| 705 | int bkt_size; // bucket size |
---|
| 706 | ANNpointArray pts; // the points |
---|
| 707 | ANNidxArray pidx; // point indices (to pts array) |
---|
| 708 | ANNkd_ptr root; // root of kd-tree |
---|
| 709 | ANNpoint bnd_box_lo; // bounding box low point |
---|
| 710 | ANNpoint bnd_box_hi; // bounding box high point |
---|
| 711 | |
---|
| 712 | void SkeletonTree( // construct skeleton tree |
---|
| 713 | int n, // number of points |
---|
| 714 | int dd, // dimension |
---|
| 715 | int bs, // bucket size |
---|
| 716 | ANNpointArray pa = NULL, // point array (optional) |
---|
| 717 | ANNidxArray pi = NULL); // point indices (optional) |
---|
| 718 | |
---|
| 719 | public: |
---|
| 720 | ANNkd_tree( // build skeleton tree |
---|
| 721 | int n = 0, // number of points |
---|
| 722 | int dd = 0, // dimension |
---|
| 723 | int bs = 1); // bucket size |
---|
| 724 | |
---|
| 725 | ANNkd_tree( // build from point array |
---|
| 726 | ANNpointArray pa, // point array |
---|
| 727 | int n, // number of points |
---|
| 728 | int dd, // dimension |
---|
| 729 | int bs = 1, // bucket size |
---|
| 730 | ANNsplitRule split = ANN_KD_SUGGEST); // splitting method |
---|
| 731 | |
---|
| 732 | ANNkd_tree( // build from dump file |
---|
| 733 | std::istream& in); // input stream for dump file |
---|
| 734 | |
---|
| 735 | ~ANNkd_tree(); // tree destructor |
---|
| 736 | |
---|
| 737 | void annkSearch( // approx k near neighbor search |
---|
| 738 | ANNpoint q, // query point |
---|
| 739 | int k, // number of near neighbors to return |
---|
| 740 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
---|
| 741 | ANNdistArray dd, // dist to near neighbors (modified) |
---|
| 742 | double eps=0.0); // error bound |
---|
| 743 | |
---|
| 744 | void annkPriSearch( // priority k near neighbor search |
---|
| 745 | ANNpoint q, // query point |
---|
| 746 | int k, // number of near neighbors to return |
---|
| 747 | ANNidxArray nn_idx, // nearest neighbor array (modified) |
---|
| 748 | ANNdistArray dd, // dist to near neighbors (modified) |
---|
| 749 | double eps=0.0); // error bound |
---|
| 750 | |
---|
| 751 | int annkFRSearch( // approx fixed-radius kNN search |
---|
| 752 | ANNpoint q, // the query point |
---|
| 753 | ANNdist sqRad, // squared radius of query ball |
---|
| 754 | int k, // number of neighbors to return |
---|
| 755 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
---|
| 756 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
---|
| 757 | double eps=0.0); // error bound |
---|
| 758 | |
---|
| 759 | int theDim() // return dimension of space |
---|
| 760 | { return dim; } |
---|
| 761 | |
---|
| 762 | int nPoints() // return number of points |
---|
| 763 | { return n_pts; } |
---|
| 764 | |
---|
| 765 | ANNpointArray thePoints() // return pointer to points |
---|
| 766 | { return pts; } |
---|
| 767 | |
---|
| 768 | virtual void Print( // print the tree (for debugging) |
---|
| 769 | ANNbool with_pts, // print points as well? |
---|
| 770 | std::ostream& out); // output stream |
---|
| 771 | |
---|
| 772 | virtual void Dump( // dump entire tree |
---|
| 773 | ANNbool with_pts, // print points as well? |
---|
| 774 | std::ostream& out); // output stream |
---|
| 775 | |
---|
| 776 | virtual void getStats( // compute tree statistics |
---|
| 777 | ANNkdStats& st); // the statistics (modified) |
---|
| 778 | }; |
---|
| 779 | |
---|
| 780 | //---------------------------------------------------------------------- |
---|
| 781 | // Box decomposition tree (bd-tree) |
---|
| 782 | // The bd-tree is inherited from a kd-tree. The main difference |
---|
| 783 | // in the bd-tree and the kd-tree is a new type of internal node |
---|
| 784 | // called a shrinking node (in the kd-tree there is only one type |
---|
| 785 | // of internal node, a splitting node). The shrinking node |
---|
| 786 | // makes it possible to generate balanced trees in which the |
---|
| 787 | // cells have bounded aspect ratio, by allowing the decomposition |
---|
| 788 | // to zoom in on regions of dense point concentration. Although |
---|
| 789 | // this is a nice idea in theory, few point distributions are so |
---|
| 790 | // densely clustered that this is really needed. |
---|
| 791 | //---------------------------------------------------------------------- |
---|
| 792 | |
---|
| 793 | class DLL_API ANNbd_tree: public ANNkd_tree { |
---|
| 794 | public: |
---|
| 795 | ANNbd_tree( // build skeleton tree |
---|
| 796 | int n, // number of points |
---|
| 797 | int dd, // dimension |
---|
| 798 | int bs = 1) // bucket size |
---|
| 799 | : ANNkd_tree(n, dd, bs) {} // build base kd-tree |
---|
| 800 | |
---|
| 801 | ANNbd_tree( // build from point array |
---|
| 802 | ANNpointArray pa, // point array |
---|
| 803 | int n, // number of points |
---|
| 804 | int dd, // dimension |
---|
| 805 | int bs = 1, // bucket size |
---|
| 806 | ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule |
---|
| 807 | ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule |
---|
| 808 | |
---|
| 809 | ANNbd_tree( // build from dump file |
---|
| 810 | std::istream& in); // input stream for dump file |
---|
| 811 | }; |
---|
| 812 | |
---|
| 813 | //---------------------------------------------------------------------- |
---|
| 814 | // Other functions |
---|
| 815 | // annMaxPtsVisit Sets a limit on the maximum number of points |
---|
| 816 | // to visit in the search. |
---|
| 817 | // annClose Can be called when all use of ANN is finished. |
---|
| 818 | // It clears up a minor memory leak. |
---|
| 819 | //---------------------------------------------------------------------- |
---|
| 820 | |
---|
| 821 | DLL_API void annMaxPtsVisit( // max. pts to visit in search |
---|
| 822 | int maxPts); // the limit |
---|
| 823 | |
---|
| 824 | DLL_API void annClose(); // called to end use of ANN |
---|
| 825 | |
---|
| 826 | #endif |
---|