1 | //---------------------------------------------------------------------- |
---|
2 | // File: rand.cpp |
---|
3 | // Programmer: Sunil Arya and David Mount |
---|
4 | // Description: Routines for random point generation |
---|
5 | // Last modified: 08/04/06 (Version 1.1.1) |
---|
6 | //---------------------------------------------------------------------- |
---|
7 | // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and |
---|
8 | // David Mount. All Rights Reserved. |
---|
9 | // |
---|
10 | // This software and related documentation is part of the Approximate |
---|
11 | // Nearest Neighbor Library (ANN). This software is provided under |
---|
12 | // the provisions of the Lesser GNU Public License (LGPL). See the |
---|
13 | // file ../ReadMe.txt for further information. |
---|
14 | // |
---|
15 | // The University of Maryland (U.M.) and the authors make no |
---|
16 | // representations about the suitability or fitness of this software for |
---|
17 | // any purpose. It is provided "as is" without express or implied |
---|
18 | // warranty. |
---|
19 | //---------------------------------------------------------------------- |
---|
20 | // History: |
---|
21 | // Revision 0.1 03/04/98 |
---|
22 | // Initial release |
---|
23 | // Revision 0.2 03/26/98 |
---|
24 | // Changed random/srandom declarations for SGI's. |
---|
25 | // Revision 1.0 04/01/05 |
---|
26 | // annClusGauss centers distributed over [-1,1] rather than [0,1] |
---|
27 | // Added annClusOrthFlats distribution |
---|
28 | // Changed procedure names to avoid namespace conflicts |
---|
29 | // Added annClusFlats distribution |
---|
30 | // Added rand/srand option and fixed annRan0() initialization. |
---|
31 | // Revision 1.1.1 08/04/06 |
---|
32 | // Added planted distribution |
---|
33 | //---------------------------------------------------------------------- |
---|
34 | |
---|
35 | #include "rand.h" // random generator declarations |
---|
36 | |
---|
37 | using namespace std; // make std:: accessible |
---|
38 | |
---|
39 | //---------------------------------------------------------------------- |
---|
40 | // Globals |
---|
41 | //---------------------------------------------------------------------- |
---|
42 | int annIdum = 0; // used for random number generation |
---|
43 | |
---|
44 | //------------------------------------------------------------------------ |
---|
45 | // annRan0 - (safer) uniform random number generator |
---|
46 | // |
---|
47 | // The code given here is taken from "Numerical Recipes in C" by |
---|
48 | // William Press, Brian Flannery, Saul Teukolsky, and William |
---|
49 | // Vetterling. The task of the code is to do an additional randomizing |
---|
50 | // shuffle on the system-supplied random number generator to make it |
---|
51 | // safer to use. |
---|
52 | // |
---|
53 | // Returns a uniform deviate between 0.0 and 1.0 using the |
---|
54 | // system-supplied routine random() or rand(). Set the global |
---|
55 | // annIdum to any negative value to initialise or reinitialise |
---|
56 | // the sequence. |
---|
57 | //------------------------------------------------------------------------ |
---|
58 | |
---|
59 | double annRan0() |
---|
60 | { |
---|
61 | const int TAB_SIZE = 97; // table size: any large number |
---|
62 | int j; |
---|
63 | |
---|
64 | static double y, v[TAB_SIZE]; |
---|
65 | static int iff = 0; |
---|
66 | const double RAN_DIVISOR = double(ANN_RAND_MAX + 1UL); |
---|
67 | if (RAN_DIVISOR < 0) { |
---|
68 | cout << "RAN_DIVISOR " << RAN_DIVISOR << endl; |
---|
69 | exit(0); |
---|
70 | } |
---|
71 | |
---|
72 | //-------------------------------------------------------------------- |
---|
73 | // As a precaution against misuse, we will always initialize on the |
---|
74 | // first call, even if "annIdum" is not set negative. Determine |
---|
75 | // "maxran", the next integer after the largest representable value |
---|
76 | // of type int. We assume this is a factor of 2 smaller than the |
---|
77 | // corresponding value of type unsigned int. |
---|
78 | //-------------------------------------------------------------------- |
---|
79 | |
---|
80 | if (annIdum < 0 || iff == 0) { // initialize |
---|
81 | iff = 1; |
---|
82 | ANN_SRAND(annIdum); // (re)seed the generator |
---|
83 | annIdum = 1; |
---|
84 | |
---|
85 | for (j = 0; j < TAB_SIZE; j++) // exercise the system routine |
---|
86 | ANN_RAND(); // (values intentionally ignored) |
---|
87 | |
---|
88 | for (j = 0; j < TAB_SIZE; j++) // then save TAB_SIZE-1 values |
---|
89 | v[j] = ANN_RAND(); |
---|
90 | y = ANN_RAND(); // generate starting value |
---|
91 | } |
---|
92 | |
---|
93 | //-------------------------------------------------------------------- |
---|
94 | // This is where we start if not initializing. Use the previously |
---|
95 | // saved random number y to get an index j between 1 and TAB_SIZE-1. |
---|
96 | // Then use the corresponding v[j] for both the next j and as the |
---|
97 | // output number. |
---|
98 | //-------------------------------------------------------------------- |
---|
99 | |
---|
100 | j = int(TAB_SIZE * (y / RAN_DIVISOR)); |
---|
101 | y = v[j]; |
---|
102 | v[j] = ANN_RAND(); // refill the table entry |
---|
103 | return y / RAN_DIVISOR; |
---|
104 | } |
---|
105 | |
---|
106 | //------------------------------------------------------------------------ |
---|
107 | // annRanInt - generate a random integer from {0,1,...,n-1} |
---|
108 | // |
---|
109 | // If n == 0, then -1 is returned. |
---|
110 | //------------------------------------------------------------------------ |
---|
111 | |
---|
112 | static int annRanInt( |
---|
113 | int n) |
---|
114 | { |
---|
115 | int r = (int) (annRan0()*n); |
---|
116 | if (r == n) r--; // (in case annRan0() == 1 or n == 0) |
---|
117 | return r; |
---|
118 | } |
---|
119 | |
---|
120 | //------------------------------------------------------------------------ |
---|
121 | // annRanUnif - generate a random uniform in [lo,hi] |
---|
122 | //------------------------------------------------------------------------ |
---|
123 | |
---|
124 | static double annRanUnif( |
---|
125 | double lo, |
---|
126 | double hi) |
---|
127 | { |
---|
128 | return annRan0()*(hi-lo) + lo; |
---|
129 | } |
---|
130 | |
---|
131 | //------------------------------------------------------------------------ |
---|
132 | // annRanGauss - Gaussian random number generator |
---|
133 | // Returns a normally distributed deviate with zero mean and unit |
---|
134 | // variance, using annRan0() as the source of uniform deviates. |
---|
135 | //------------------------------------------------------------------------ |
---|
136 | |
---|
137 | static double annRanGauss() |
---|
138 | { |
---|
139 | static int iset=0; |
---|
140 | static double gset; |
---|
141 | |
---|
142 | if (iset == 0) { // we don't have a deviate handy |
---|
143 | double v1, v2; |
---|
144 | double r = 2.0; |
---|
145 | while (r >= 1.0) { |
---|
146 | //------------------------------------------------------------ |
---|
147 | // Pick two uniform numbers in the square extending from -1 to |
---|
148 | // +1 in each direction, see if they are in the circle of radius |
---|
149 | // 1. If not, try again |
---|
150 | //------------------------------------------------------------ |
---|
151 | v1 = annRanUnif(-1, 1); |
---|
152 | v2 = annRanUnif(-1, 1); |
---|
153 | r = v1 * v1 + v2 * v2; |
---|
154 | } |
---|
155 | double fac = sqrt(-2.0 * log(r) / r); |
---|
156 | //----------------------------------------------------------------- |
---|
157 | // Now make the Box-Muller transformation to get two normal |
---|
158 | // deviates. Return one and save the other for next time. |
---|
159 | //----------------------------------------------------------------- |
---|
160 | gset = v1 * fac; |
---|
161 | iset = 1; // set flag |
---|
162 | return v2 * fac; |
---|
163 | } |
---|
164 | else { // we have an extra deviate handy |
---|
165 | iset = 0; // so unset the flag |
---|
166 | return gset; // and return it |
---|
167 | } |
---|
168 | } |
---|
169 | |
---|
170 | //------------------------------------------------------------------------ |
---|
171 | // annRanLaplace - Laplacian random number generator |
---|
172 | // Returns a Laplacian distributed deviate with zero mean and |
---|
173 | // unit variance, using annRan0() as the source of uniform deviates. |
---|
174 | // |
---|
175 | // prob(x) = b/2 * exp(-b * |x|). |
---|
176 | // |
---|
177 | // b is chosen to be sqrt(2.0) so that the variance of the Laplacian |
---|
178 | // distribution [2/(b^2)] becomes 1. |
---|
179 | //------------------------------------------------------------------------ |
---|
180 | |
---|
181 | static double annRanLaplace() |
---|
182 | { |
---|
183 | const double b = 1.4142136; |
---|
184 | |
---|
185 | double laprand = -log(annRan0()) / b; |
---|
186 | double sign = annRan0(); |
---|
187 | if (sign < 0.5) laprand = -laprand; |
---|
188 | return(laprand); |
---|
189 | } |
---|
190 | |
---|
191 | //---------------------------------------------------------------------- |
---|
192 | // annUniformPts - Generate uniformly distributed points |
---|
193 | // A uniform distribution over [-1,1]. |
---|
194 | //---------------------------------------------------------------------- |
---|
195 | |
---|
196 | void annUniformPts( // uniform distribution |
---|
197 | ANNpointArray pa, // point array (modified) |
---|
198 | int n, // number of points |
---|
199 | int dim) // dimension |
---|
200 | { |
---|
201 | for (int i = 0; i < n; i++) { |
---|
202 | for (int d = 0; d < dim; d++) { |
---|
203 | pa[i][d] = (ANNcoord) (annRanUnif(-1,1)); |
---|
204 | } |
---|
205 | } |
---|
206 | } |
---|
207 | |
---|
208 | //---------------------------------------------------------------------- |
---|
209 | // annGaussPts - Generate Gaussian distributed points |
---|
210 | // A Gaussian distribution with zero mean and the given standard |
---|
211 | // deviation. |
---|
212 | //---------------------------------------------------------------------- |
---|
213 | |
---|
214 | void annGaussPts( // Gaussian distribution |
---|
215 | ANNpointArray pa, // point array (modified) |
---|
216 | int n, // number of points |
---|
217 | int dim, // dimension |
---|
218 | double std_dev) // standard deviation |
---|
219 | { |
---|
220 | for (int i = 0; i < n; i++) { |
---|
221 | for (int d = 0; d < dim; d++) { |
---|
222 | pa[i][d] = (ANNcoord) (annRanGauss() * std_dev); |
---|
223 | } |
---|
224 | } |
---|
225 | } |
---|
226 | |
---|
227 | //---------------------------------------------------------------------- |
---|
228 | // annLaplacePts - Generate Laplacian distributed points |
---|
229 | // Generates a Laplacian distribution (zero mean and unit variance). |
---|
230 | //---------------------------------------------------------------------- |
---|
231 | |
---|
232 | void annLaplacePts( // Laplacian distribution |
---|
233 | ANNpointArray pa, // point array (modified) |
---|
234 | int n, // number of points |
---|
235 | int dim) // dimension |
---|
236 | { |
---|
237 | for (int i = 0; i < n; i++) { |
---|
238 | for (int d = 0; d < dim; d++) { |
---|
239 | pa[i][d] = (ANNcoord) annRanLaplace(); |
---|
240 | } |
---|
241 | } |
---|
242 | } |
---|
243 | |
---|
244 | //---------------------------------------------------------------------- |
---|
245 | // annCoGaussPts - Generate correlated Gaussian distributed points |
---|
246 | // Generates a Gauss-Markov distribution of zero mean and unit |
---|
247 | // variance. |
---|
248 | //---------------------------------------------------------------------- |
---|
249 | |
---|
250 | void annCoGaussPts( // correlated-Gaussian distribution |
---|
251 | ANNpointArray pa, // point array (modified) |
---|
252 | int n, // number of points |
---|
253 | int dim, // dimension |
---|
254 | double correlation) // correlation |
---|
255 | { |
---|
256 | double std_dev_w = sqrt(1.0 - correlation * correlation); |
---|
257 | for (int i = 0; i < n; i++) { |
---|
258 | double previous = annRanGauss(); |
---|
259 | pa[i][0] = (ANNcoord) previous; |
---|
260 | for (int d = 1; d < dim; d++) { |
---|
261 | previous = correlation*previous + std_dev_w*annRanGauss(); |
---|
262 | pa[i][d] = (ANNcoord) previous; |
---|
263 | } |
---|
264 | } |
---|
265 | } |
---|
266 | |
---|
267 | //---------------------------------------------------------------------- |
---|
268 | // annCoLaplacePts - Generate correlated Laplacian distributed points |
---|
269 | // Generates a Laplacian-Markov distribution of zero mean and unit |
---|
270 | // variance. |
---|
271 | //---------------------------------------------------------------------- |
---|
272 | |
---|
273 | void annCoLaplacePts( // correlated-Laplacian distribution |
---|
274 | ANNpointArray pa, // point array (modified) |
---|
275 | int n, // number of points |
---|
276 | int dim, // dimension |
---|
277 | double correlation) // correlation |
---|
278 | { |
---|
279 | double wn; |
---|
280 | double corr_sq = correlation * correlation; |
---|
281 | |
---|
282 | for (int i = 0; i < n; i++) { |
---|
283 | double previous = annRanLaplace(); |
---|
284 | pa[i][0] = (ANNcoord) previous; |
---|
285 | for (int d = 1; d < dim; d++) { |
---|
286 | double temp = annRan0(); |
---|
287 | if (temp < corr_sq) |
---|
288 | wn = 0.0; |
---|
289 | else |
---|
290 | wn = annRanLaplace(); |
---|
291 | previous = correlation * previous + wn; |
---|
292 | pa[i][d] = (ANNcoord) previous; |
---|
293 | } |
---|
294 | } |
---|
295 | } |
---|
296 | |
---|
297 | //---------------------------------------------------------------------- |
---|
298 | // annClusGaussPts - Generate clusters of Gaussian distributed points |
---|
299 | // Cluster centers are uniformly distributed over [-1,1], and the |
---|
300 | // standard deviation within each cluster is fixed. |
---|
301 | // |
---|
302 | // Note: Once cluster centers have been set, they are not changed, |
---|
303 | // unless new_clust = true. This is so that subsequent calls generate |
---|
304 | // points from the same distribution. It follows, of course, that any |
---|
305 | // attempt to change the dimension or number of clusters without |
---|
306 | // generating new clusters is asking for trouble. |
---|
307 | // |
---|
308 | // Note: Cluster centers are not generated by a call to uniformPts(). |
---|
309 | // Although this could be done, it has been omitted for |
---|
310 | // compatibility with annClusGaussPts() in the colored version, |
---|
311 | // rand_c.cc. |
---|
312 | //---------------------------------------------------------------------- |
---|
313 | |
---|
314 | void annClusGaussPts( // clustered-Gaussian distribution |
---|
315 | ANNpointArray pa, // point array (modified) |
---|
316 | int n, // number of points |
---|
317 | int dim, // dimension |
---|
318 | int n_clus, // number of colors |
---|
319 | ANNbool new_clust, // generate new clusters. |
---|
320 | double std_dev) // standard deviation within clusters |
---|
321 | { |
---|
322 | static ANNpointArray clusters = NULL;// cluster storage |
---|
323 | |
---|
324 | if (clusters == NULL || new_clust) {// need new cluster centers |
---|
325 | if (clusters != NULL) // clusters already exist |
---|
326 | annDeallocPts(clusters); // get rid of them |
---|
327 | clusters = annAllocPts(n_clus, dim); |
---|
328 | // generate cluster center coords |
---|
329 | for (int i = 0; i < n_clus; i++) { |
---|
330 | for (int d = 0; d < dim; d++) { |
---|
331 | clusters[i][d] = (ANNcoord) annRanUnif(-1,1); |
---|
332 | } |
---|
333 | } |
---|
334 | } |
---|
335 | |
---|
336 | for (int i = 0; i < n; i++) { |
---|
337 | int c = annRanInt(n_clus); // generate cluster index |
---|
338 | for (int d = 0; d < dim; d++) { |
---|
339 | pa[i][d] = (ANNcoord) (std_dev*annRanGauss() + clusters[c][d]); |
---|
340 | } |
---|
341 | } |
---|
342 | } |
---|
343 | |
---|
344 | //---------------------------------------------------------------------- |
---|
345 | // annClusOrthFlats - points clustered along orthogonal flats |
---|
346 | // |
---|
347 | // This distribution consists of a collection points clustered |
---|
348 | // among a collection of axis-aligned low dimensional flats in |
---|
349 | // the hypercube [-1,1]^d. A set of n_clus orthogonal flats are |
---|
350 | // generated, each whose dimension is a random number between 1 |
---|
351 | // and max_dim. The points are evenly distributed among the clusters. |
---|
352 | // For each cluster, we generate points uniformly distributed along |
---|
353 | // the flat within the hypercube. |
---|
354 | // |
---|
355 | // This is done as follows. Each cluster is defined by a d-element |
---|
356 | // control vector whose components are either: |
---|
357 | // |
---|
358 | // CO_FLAG indicating that this component is to be generated |
---|
359 | // uniformly in [-1,1], |
---|
360 | // x a value other than CO_FLAG in the range [-1,1], |
---|
361 | // which indicates that this coordinate is to be |
---|
362 | // generated as x plus a Gaussian random deviation |
---|
363 | // with the given standard deviation. |
---|
364 | // |
---|
365 | // The number of zero components is the dimension of the flat, which |
---|
366 | // is a random integer in the range from 1 to max_dim. The points |
---|
367 | // are disributed between clusters in nearly equal sized groups. |
---|
368 | // |
---|
369 | // Note: Once cluster centers have been set, they are not changed, |
---|
370 | // unless new_clust = true. This is so that subsequent calls generate |
---|
371 | // points from the same distribution. It follows, of course, that any |
---|
372 | // attempt to change the dimension or number of clusters without |
---|
373 | // generating new clusters is asking for trouble. |
---|
374 | // |
---|
375 | // To make this a bad scenario at query time, query points should be |
---|
376 | // selected from a different distribution, e.g. uniform or Gaussian. |
---|
377 | // |
---|
378 | // We use a little programming trick to generate groups of roughly |
---|
379 | // equal size. If n is the total number of points, and n_clus is |
---|
380 | // the number of clusters, then the c-th cluster (0 <= c < n_clus) |
---|
381 | // is given floor((n+c)/n_clus) points. It can be shown that this |
---|
382 | // will exactly consume all n points. |
---|
383 | // |
---|
384 | // This procedure makes use of the utility procedure, genOrthFlat |
---|
385 | // which generates points in one orthogonal flat, according to |
---|
386 | // the given control vector. |
---|
387 | // |
---|
388 | //---------------------------------------------------------------------- |
---|
389 | const double CO_FLAG = 999; // special flag value |
---|
390 | |
---|
391 | static void genOrthFlat( // generate points on an orthog flat |
---|
392 | ANNpointArray pa, // point array |
---|
393 | int n, // number of points |
---|
394 | int dim, // dimension |
---|
395 | double *control, // control vector |
---|
396 | double std_dev) // standard deviation |
---|
397 | { |
---|
398 | for (int i = 0; i < n; i++) { // generate each point |
---|
399 | for (int d = 0; d < dim; d++) { // generate each coord |
---|
400 | if (control[d] == CO_FLAG) // dimension on flat |
---|
401 | pa[i][d] = (ANNcoord) annRanUnif(-1,1); |
---|
402 | else // dimension off flat |
---|
403 | pa[i][d] = (ANNcoord) (std_dev*annRanGauss() + control[d]); |
---|
404 | } |
---|
405 | } |
---|
406 | } |
---|
407 | |
---|
408 | void annClusOrthFlats( // clustered along orthogonal flats |
---|
409 | ANNpointArray pa, // point array (modified) |
---|
410 | int n, // number of points |
---|
411 | int dim, // dimension |
---|
412 | int n_clus, // number of colors |
---|
413 | ANNbool new_clust, // generate new clusters. |
---|
414 | double std_dev, // standard deviation within clusters |
---|
415 | int max_dim) // maximum dimension of the flats |
---|
416 | { |
---|
417 | static ANNpointArray control = NULL; // control vectors |
---|
418 | |
---|
419 | if (control == NULL || new_clust) { // need new cluster centers |
---|
420 | if (control != NULL) { // clusters already exist |
---|
421 | annDeallocPts(control); // get rid of them |
---|
422 | } |
---|
423 | control = annAllocPts(n_clus, dim); |
---|
424 | |
---|
425 | for (int c = 0; c < n_clus; c++) { // generate clusters |
---|
426 | int n_dim = 1 + annRanInt(max_dim); // number of dimensions in flat |
---|
427 | for (int d = 0; d < dim; d++) { // generate side locations |
---|
428 | // prob. of picking next dim |
---|
429 | double Prob = ((double) n_dim)/((double) (dim-d)); |
---|
430 | if (annRan0() < Prob) { // add this one to flat |
---|
431 | control[c][d] = CO_FLAG; // flag this entry |
---|
432 | n_dim--; // one fewer dim to fill |
---|
433 | } |
---|
434 | else { // don't take this one |
---|
435 | control[c][d] = annRanUnif(-1,1);// random value in [-1,1] |
---|
436 | } |
---|
437 | } |
---|
438 | } |
---|
439 | } |
---|
440 | int offset = 0; // offset in pa array |
---|
441 | for (int c = 0; c < n_clus; c++) { // generate clusters |
---|
442 | int pick = (n+c)/n_clus; // number of points to pick |
---|
443 | // generate the points |
---|
444 | genOrthFlat(pa+offset, pick, dim, control[c], std_dev); |
---|
445 | offset += pick; // increment offset |
---|
446 | } |
---|
447 | } |
---|
448 | |
---|
449 | //---------------------------------------------------------------------- |
---|
450 | // annClusEllipsoids - points clustered around axis-aligned ellipsoids |
---|
451 | // |
---|
452 | // This distribution consists of a collection points clustered |
---|
453 | // among a collection of low dimensional ellipsoids whose axes |
---|
454 | // are alligned with the coordinate axes in the hypercube [-1,1]^d. |
---|
455 | // The objective is to model distributions in which the points are |
---|
456 | // distributed in lower dimensional subspaces, and within this |
---|
457 | // lower dimensional space the points are distributed with a |
---|
458 | // Gaussian distribution (with no correlation between the |
---|
459 | // dimensions). |
---|
460 | // |
---|
461 | // The distribution is given the number of clusters or "colors" |
---|
462 | // (n_clus), maximum number of dimensions (max_dim) of the lower |
---|
463 | // dimensional subspace, a "small" standard deviation |
---|
464 | // (std_dev_small), and a "large" standard deviation range |
---|
465 | // (std_dev_lo, std_dev_hi). |
---|
466 | // |
---|
467 | // The algorithm generates n_clus cluster centers uniformly from |
---|
468 | // the hypercube [-1,1]^d. For each cluster, it selects the |
---|
469 | // dimension of the subspace as a random number r between 1 and |
---|
470 | // max_dim. These are the dimensions of the ellipsoid. Then it |
---|
471 | // generates a d-element std dev vector whose entries are the |
---|
472 | // standard deviation for the coordinates of each cluster in the |
---|
473 | // distribution. Among the d-element control vector, r randomly |
---|
474 | // chosen values are chosen uniformly from the range [std_dev_lo, |
---|
475 | // std_dev_hi]. The remaining values are set to std_dev_small. |
---|
476 | // |
---|
477 | // Note that annClusGaussPts is a special case of this in which |
---|
478 | // max_dim = 0, and std_dev = std_dev_small. |
---|
479 | // |
---|
480 | // If the flag new_clust is set, then new cluster centers are |
---|
481 | // generated. |
---|
482 | // |
---|
483 | // This procedure makes use of the utility procedure genGauss |
---|
484 | // which generates points distributed according to a Gaussian |
---|
485 | // distribution. |
---|
486 | // |
---|
487 | //---------------------------------------------------------------------- |
---|
488 | |
---|
489 | static void genGauss( // generate points on a general Gaussian |
---|
490 | ANNpointArray pa, // point array |
---|
491 | int n, // number of points |
---|
492 | int dim, // dimension |
---|
493 | double *center, // center vector |
---|
494 | double *std_dev) // standard deviation vector |
---|
495 | { |
---|
496 | for (int i = 0; i < n; i++) { |
---|
497 | for (int d = 0; d < dim; d++) { |
---|
498 | pa[i][d] = (ANNcoord) (std_dev[d]*annRanGauss() + center[d]); |
---|
499 | } |
---|
500 | } |
---|
501 | } |
---|
502 | |
---|
503 | void annClusEllipsoids( // clustered around ellipsoids |
---|
504 | ANNpointArray pa, // point array (modified) |
---|
505 | int n, // number of points |
---|
506 | int dim, // dimension |
---|
507 | int n_clus, // number of colors |
---|
508 | ANNbool new_clust, // generate new clusters. |
---|
509 | double std_dev_small, // small standard deviation |
---|
510 | double std_dev_lo, // low standard deviation for ellipses |
---|
511 | double std_dev_hi, // high standard deviation for ellipses |
---|
512 | int max_dim) // maximum dimension of the flats |
---|
513 | { |
---|
514 | static ANNpointArray centers = NULL; // cluster centers |
---|
515 | static ANNpointArray std_dev = NULL; // standard deviations |
---|
516 | |
---|
517 | if (centers == NULL || new_clust) { // need new cluster centers |
---|
518 | if (centers != NULL) // clusters already exist |
---|
519 | annDeallocPts(centers); // get rid of them |
---|
520 | if (std_dev != NULL) // std deviations already exist |
---|
521 | annDeallocPts(std_dev); // get rid of them |
---|
522 | |
---|
523 | centers = annAllocPts(n_clus, dim); // alloc new clusters and devs |
---|
524 | std_dev = annAllocPts(n_clus, dim); |
---|
525 | |
---|
526 | for (int i = 0; i < n_clus; i++) { // gen cluster center coords |
---|
527 | for (int d = 0; d < dim; d++) { |
---|
528 | centers[i][d] = (ANNcoord) annRanUnif(-1,1); |
---|
529 | } |
---|
530 | } |
---|
531 | for (int c = 0; c < n_clus; c++) { // generate cluster std dev |
---|
532 | int n_dim = 1 + annRanInt(max_dim); // number of dimensions in flat |
---|
533 | for (int d = 0; d < dim; d++) { // generate std dev's |
---|
534 | // prob. of picking next dim |
---|
535 | double Prob = ((double) n_dim)/((double) (dim-d)); |
---|
536 | if (annRan0() < Prob) { // add this one to ellipse |
---|
537 | // generate random std dev |
---|
538 | std_dev[c][d] = annRanUnif(std_dev_lo, std_dev_hi); |
---|
539 | n_dim--; // one fewer dim to fill |
---|
540 | } |
---|
541 | else { // don't take this one |
---|
542 | std_dev[c][d] = std_dev_small;// use small std dev |
---|
543 | } |
---|
544 | } |
---|
545 | } |
---|
546 | } |
---|
547 | |
---|
548 | int offset = 0; // next slot to fill |
---|
549 | for (int c = 0; c < n_clus; c++) { // generate clusters |
---|
550 | int pick = (n+c)/n_clus; // number of points to pick |
---|
551 | // generate the points |
---|
552 | genGauss(pa+offset, pick, dim, centers[c], std_dev[c]); |
---|
553 | offset += pick; // increment offset in array |
---|
554 | } |
---|
555 | } |
---|
556 | |
---|
557 | //---------------------------------------------------------------------- |
---|
558 | // annPlanted - Generates points from a "planted" distribution |
---|
559 | // In high dimensional spaces, interpoint distances tend to be |
---|
560 | // highly clustered around the mean value. Approximate nearest |
---|
561 | // neighbor searching makes little sense in this context, unless it |
---|
562 | // is the case that each query point is significantly closer to its |
---|
563 | // nearest neighbor than to other points. Thus, the query points |
---|
564 | // should be planted close to the data points. Given a source data |
---|
565 | // set, this procedure generates a set of query points having this |
---|
566 | // property. |
---|
567 | // |
---|
568 | // We are given a source data array and a standard deviation. We |
---|
569 | // generate points as follows. We select a random point from the |
---|
570 | // source data set, and we generate a Gaussian point centered about |
---|
571 | // this random point and perturbed by a normal distributed random |
---|
572 | // variable with mean zero and the given standard deviation along |
---|
573 | // each coordinate. |
---|
574 | // |
---|
575 | // Note that this essentially the same a clustered Gaussian |
---|
576 | // distribution, but where the cluster centers are given by the |
---|
577 | // source data set. |
---|
578 | //---------------------------------------------------------------------- |
---|
579 | |
---|
580 | void annPlanted( // planted nearest neighbors |
---|
581 | ANNpointArray pa, // point array (modified) |
---|
582 | int n, // number of points |
---|
583 | int dim, // dimension |
---|
584 | ANNpointArray src, // source point array |
---|
585 | int n_src, // source size |
---|
586 | double std_dev) // standard deviation about source |
---|
587 | { |
---|
588 | for (int i = 0; i < n; i++) { |
---|
589 | int c = annRanInt(n_src); // generate source index |
---|
590 | for (int d = 0; d < dim; d++) { |
---|
591 | pa[i][d] = (ANNcoord) (std_dev*annRanGauss() + src[c][d]); |
---|
592 | } |
---|
593 | } |
---|
594 | } |
---|