1 | // example1.cpp : Defines the entry point for the console application. |
---|
2 | // |
---|
3 | |
---|
4 | //#include "stdafx.h" |
---|
5 | |
---|
6 | #include <vector> |
---|
7 | #include <stdio.h> |
---|
8 | #include <stdlib.h> |
---|
9 | #include <float.h> |
---|
10 | #include <cuda.h> |
---|
11 | #include <cudpp.h> |
---|
12 | #include <cutil.h> |
---|
13 | #include <math_functions.h> |
---|
14 | #include <image.h> |
---|
15 | #include <misc.h> |
---|
16 | #include <pnmfile.h> |
---|
17 | #include <cutil_math.h> |
---|
18 | #include <sys/time.h> |
---|
19 | #include <unistd.h> |
---|
20 | |
---|
21 | |
---|
22 | #include "segment-image.h" |
---|
23 | |
---|
24 | |
---|
25 | #define BLOCK_DIM_X 12 |
---|
26 | #define BLOCK_DIM_Y 8 |
---|
27 | |
---|
28 | #define max FLT_MAX |
---|
29 | |
---|
30 | |
---|
31 | |
---|
32 | __global__ void convolve_even_gpu(imagef *src, imagef *dst, float *mask, int dim_mask){ |
---|
33 | int w = src->w; |
---|
34 | int h = src->h; |
---|
35 | int x = blockIdx.x * blockDim.x + threadIdx.x; |
---|
36 | int y = blockIdx.y * blockDim.y + threadIdx.y; |
---|
37 | int idx = x + y*w; |
---|
38 | int i; |
---|
39 | float sum; |
---|
40 | int mmax,mmin; |
---|
41 | |
---|
42 | if (x>=w || y>=h) |
---|
43 | return; |
---|
44 | |
---|
45 | sum = mask[0] * src->data[idx]; |
---|
46 | |
---|
47 | for (i = 1; i < dim_mask; i++) { |
---|
48 | mmax = (x-i > 0 ? x-i : 0); |
---|
49 | //mmax = max(x-i,0);// > 0 ? x-i : 0) |
---|
50 | mmin = (x+i > w-1 ? x+i : w-1); |
---|
51 | sum += mask[i] * (src->data[mmax + y * w]) + src->data[mmin + y * w]; |
---|
52 | } |
---|
53 | dst->data[y + h * x] = sum; |
---|
54 | |
---|
55 | } |
---|
56 | |
---|
57 | |
---|
58 | __global__ void create_rgb_smooth(imagef *r, imagef *g, imagef *b, image *input) |
---|
59 | { |
---|
60 | int w = input->w; |
---|
61 | int h = input->h; |
---|
62 | int x = blockIdx.x * blockDim.x + threadIdx.x; |
---|
63 | int y = blockIdx.y * blockDim.y + threadIdx.y; |
---|
64 | int idx = x + y*w; |
---|
65 | |
---|
66 | if (x>=w || y>=h) |
---|
67 | return; |
---|
68 | |
---|
69 | r->data[idx] = input->data[idx].r; |
---|
70 | g->data[idx] = input->data[idx].g; |
---|
71 | b->data[idx] = input->data[idx].b; |
---|
72 | |
---|
73 | } |
---|
74 | |
---|
75 | #define diffgpu(r, g, b, x1, y1, x2, y2) \ |
---|
76 | sqrtf(powf(r->data[x1+w*y1]-r->data[x2+w*y2],2) + \ |
---|
77 | powf(g->data[x1+w*y1]-g->data[x2+w*y2],2) + \ |
---|
78 | powf(b->data[x1+w*y1]-b->data[x2+w*y2],2)); \ |
---|
79 | |
---|
80 | |
---|
81 | // Kernel that executes on the CUDA device |
---|
82 | __global__ void compute_edges(image *input, edge *edges, edge **edges_location,float *weights,imagef *smooth_r, |
---|
83 | imagef *smooth_g,imagef *smooth_b ) |
---|
84 | { |
---|
85 | //int idx = blockIdx.x * blockDim.x + threadIdx.x; |
---|
86 | int w = input->w; |
---|
87 | int h = input->h; |
---|
88 | int x = blockIdx.x * blockDim.x + threadIdx.x; |
---|
89 | int y = blockIdx.y * blockDim.y + threadIdx.y; |
---|
90 | int idx = x + y*w; |
---|
91 | |
---|
92 | // indx[idx] = idx; |
---|
93 | |
---|
94 | /* if (blockIdx.x == 0 && blockIdx.y == 0 && threadIdx.x == 0 && threadIdx.y == 0) |
---|
95 | input->w = input->w + 1; |
---|
96 | |
---|
97 | edges[0].a = 23; |
---|
98 | edges[0].b = 24; |
---|
99 | */ |
---|
100 | |
---|
101 | if (x>=w || y>=h) |
---|
102 | return; |
---|
103 | |
---|
104 | if (x < w-1) |
---|
105 | { |
---|
106 | edges[idx*4].a = y * w + x; |
---|
107 | edges[idx*4].b = y * w + (x+1); |
---|
108 | weights[idx*4] = diffgpu(smooth_r, smooth_g, smooth_b, x, y, x+1, y); |
---|
109 | } |
---|
110 | else |
---|
111 | { |
---|
112 | edges[idx*4].a = -1; |
---|
113 | edges[idx*4].b = -1; |
---|
114 | weights[idx*4] = max; |
---|
115 | } |
---|
116 | |
---|
117 | |
---|
118 | if (y < h-1) |
---|
119 | { |
---|
120 | edges[idx*4+1].a = y * w + x; |
---|
121 | edges[idx*4+1].b = (y+1) * w + x; |
---|
122 | weights[idx*4+1] = diffgpu(smooth_r, smooth_g, smooth_b, x, y, x, y+1); |
---|
123 | } |
---|
124 | else |
---|
125 | { |
---|
126 | edges[idx*4+1].a = -1; |
---|
127 | edges[idx*4+1].b = -1; |
---|
128 | weights[idx*4+1] = max; |
---|
129 | } |
---|
130 | |
---|
131 | if ((x < w-1) && (y < h-1)) |
---|
132 | { |
---|
133 | edges[idx*4+2].a = y * w + x; |
---|
134 | edges[idx*4+2].b = (y+1) * w + (x+1); |
---|
135 | weights[idx*4+2] = diffgpu(smooth_r, smooth_g, smooth_b, x, y, x+1, y+1); |
---|
136 | } |
---|
137 | else |
---|
138 | { |
---|
139 | edges[idx*4+2].a = -1; |
---|
140 | edges[idx*4+2].b = -1; |
---|
141 | weights[idx*4+2] = max; |
---|
142 | |
---|
143 | } |
---|
144 | |
---|
145 | if ((x < w-1) && (y > 0)) |
---|
146 | { |
---|
147 | edges[idx*4+3].a = y * w + x; |
---|
148 | edges[idx*4+3].b = (y-1) * w + (x+1); |
---|
149 | weights[idx*4+3] = diffgpu(smooth_r, smooth_g, smooth_b, x, y, x+1, y-1); |
---|
150 | } |
---|
151 | else |
---|
152 | { |
---|
153 | edges[idx*4+3].a = -1; |
---|
154 | edges[idx*4+3].b = -1; |
---|
155 | weights[idx*4+3] = max; |
---|
156 | } |
---|
157 | |
---|
158 | edges_location[idx*4] = &(edges[idx*4]); |
---|
159 | edges_location[idx*4+1] = &(edges[idx*4+1]); |
---|
160 | edges_location[idx*4+2] = &(edges[idx*4+2]); |
---|
161 | edges_location[idx*4+3] = &(edges[idx*4+3]); |
---|
162 | |
---|
163 | |
---|
164 | /* if (idx == 0) |
---|
165 | input->h = input->h + 1; |
---|
166 | */ |
---|
167 | |
---|
168 | } |
---|
169 | |
---|
170 | // main routine that executes on the host |
---|
171 | int main(int argc, char **argv) |
---|
172 | { |
---|
173 | if (argc != 6) |
---|
174 | { |
---|
175 | fprintf(stderr, "usage: %s sigma k min input(ppm) output(ppm)\n", argv[0]); |
---|
176 | return 1; |
---|
177 | } |
---|
178 | |
---|
179 | |
---|
180 | dim3 n_blocks, block_size; |
---|
181 | //int n_blocks, block_size; |
---|
182 | float sigma = atof(argv[1]); |
---|
183 | float k = atof(argv[2]); |
---|
184 | int min_size = atoi(argv[3]); |
---|
185 | rgb *array; |
---|
186 | edge *edges; |
---|
187 | edge *ed_temp; |
---|
188 | |
---|
189 | float* weights; |
---|
190 | edge** edges_location; |
---|
191 | |
---|
192 | |
---|
193 | struct timeval start, end; |
---|
194 | long mtime, seconds, useconds; |
---|
195 | |
---|
196 | gettimeofday(&start, NULL); |
---|
197 | |
---|
198 | printf("loading input image.\n"); |
---|
199 | image *input = loadPPM(argv[4]); |
---|
200 | |
---|
201 | /* Alloc image on device*/ |
---|
202 | image *input_cuda; |
---|
203 | |
---|
204 | image in; |
---|
205 | |
---|
206 | cudaMalloc((void **) &array, input->h * input->w * sizeof(rgb)); |
---|
207 | cudaMemcpy((void *) array, (void *) input->data, input->h * input->w * sizeof(rgb), cudaMemcpyHostToDevice); |
---|
208 | |
---|
209 | |
---|
210 | in.h = input->h; |
---|
211 | in.w = input->w; |
---|
212 | in.data = (rgb*)array; |
---|
213 | |
---|
214 | cudaMalloc((void **) &input_cuda, sizeof(image)); |
---|
215 | cudaMemcpy((void *) input_cuda, (void *) &in, sizeof(image), cudaMemcpyHostToDevice); |
---|
216 | |
---|
217 | |
---|
218 | /* Alocare matrci r,g,b de image_float*/ |
---|
219 | imagef *r; |
---|
220 | imagef *g; |
---|
221 | imagef *b; |
---|
222 | // imagef *tmp; |
---|
223 | imagef tmp; |
---|
224 | |
---|
225 | imagef *tmp_img; |
---|
226 | imagef tmp_img_cpu; |
---|
227 | |
---|
228 | float *arrayftmp; |
---|
229 | |
---|
230 | |
---|
231 | float *arrayf1, *arrayf2, *arrayf3; |
---|
232 | std::vector<float> vmask = make_fgauss(sigma); |
---|
233 | float *mask = (float *)malloc(vmask.size()*sizeof(float)) + 3 * sizeof(imagef); |
---|
234 | for (int i=0; i<vmask.size(); i++) |
---|
235 | mask[i]=vmask[i]; |
---|
236 | float *maski; |
---|
237 | // tmp = (imagef *)(((char *)mask) + vmask.size()*sizeof(float)); |
---|
238 | |
---|
239 | |
---|
240 | |
---|
241 | unsigned int rgb_size = 4 * input->h * input->w * sizeof(float) + 4 * sizeof(imagef) + sizeof(float) * vmask.size(); |
---|
242 | void *bigMem; |
---|
243 | if (cudaMalloc(&bigMem, rgb_size) != cudaSuccess) |
---|
244 | printf("ERROR big_malloc 1\n"); |
---|
245 | char *initial = (char *)bigMem; |
---|
246 | maski = (float *)initial; |
---|
247 | initial += sizeof(float) * vmask.size(); |
---|
248 | r = (imagef *)initial; |
---|
249 | initial+=sizeof(imagef); |
---|
250 | g = (imagef *)initial; |
---|
251 | initial+=sizeof(imagef); |
---|
252 | b = (imagef *)initial; |
---|
253 | initial+=sizeof(imagef); |
---|
254 | tmp_img = (imagef *)initial; |
---|
255 | initial+=sizeof(imagef); |
---|
256 | arrayf1 = (float *)initial; |
---|
257 | initial += input->h * input->w * sizeof(float); |
---|
258 | arrayf2 = (float *)initial; |
---|
259 | initial += input->h * input->w * sizeof(float); |
---|
260 | arrayf3 = (float *)initial; |
---|
261 | initial += input->h * input->w * sizeof(float); |
---|
262 | arrayftmp = (float *)initial; |
---|
263 | /* |
---|
264 | for (int i=0; i<3; i++){ |
---|
265 | tmp[i].w=input->w; |
---|
266 | tmp[i].h=input->h; |
---|
267 | } |
---|
268 | tmp[0].data = arrayf1; |
---|
269 | tmp[1].data = arrayf2; |
---|
270 | tmp[2].data = arrayf3; |
---|
271 | CUDA_SAFE_CALL(cudaMemcpy((void *)maski, (void *)mask, 3*sizeof(imagef) + vmask.size()*sizeof(float), cudaMemcpyHostToDevice)); |
---|
272 | */ |
---|
273 | // exit(0); |
---|
274 | /* |
---|
275 | if (cudaMalloc((void **) &r, sizeof(imagef)) != cudaSuccess) |
---|
276 | printf("ERROR11\n"); |
---|
277 | if (cudaMalloc((void **) &g, sizeof(imagef)) != cudaSuccess) |
---|
278 | printf("ERROR12\n"); |
---|
279 | |
---|
280 | if (cudaMalloc((void **) &b, sizeof(imagef)) != cudaSuccess) |
---|
281 | printf("ERROR13\n"); |
---|
282 | |
---|
283 | cudaMalloc((void **) &arrayf1, input->h * input->w * sizeof(float)); |
---|
284 | */ |
---|
285 | |
---|
286 | tmp.w = input->w; |
---|
287 | tmp.h = input->h; |
---|
288 | tmp.data = arrayf1; |
---|
289 | |
---|
290 | CUDA_SAFE_CALL(cudaMemcpy((void *) r, (void *) &tmp, sizeof(imagef), cudaMemcpyHostToDevice)); |
---|
291 | |
---|
292 | // cudaMalloc((void **) &arrayf2, input->h * input->w * sizeof(float)); |
---|
293 | tmp.data = arrayf2; |
---|
294 | if (cudaMemcpy((void *) g, (void *) &tmp, sizeof(imagef), cudaMemcpyHostToDevice)!=cudaSuccess) |
---|
295 | printf("Error memcpy g\n"); |
---|
296 | |
---|
297 | // cudaMalloc((void **) &arrayf3, input->h * input->w * sizeof(float)); |
---|
298 | tmp.data = arrayf3; |
---|
299 | if (cudaMemcpy((void *) b, (void *) &tmp, sizeof(imagef), cudaMemcpyHostToDevice)!=cudaSuccess) |
---|
300 | printf("Error memcpy b\n"); |
---|
301 | |
---|
302 | if (cudaMemcpy((void *) maski, (void *) mask, vmask.size() * sizeof(float), cudaMemcpyHostToDevice) != cudaSuccess) |
---|
303 | printf("Error memcpy maski\n"); |
---|
304 | |
---|
305 | |
---|
306 | //smooth alloc |
---|
307 | |
---|
308 | imagef *smooth_r; |
---|
309 | imagef *smooth_g; |
---|
310 | imagef *smooth_b; |
---|
311 | imagef smooth_tmp; |
---|
312 | |
---|
313 | |
---|
314 | float *arrayfs1, *arrayfs2, *arrayfs3; |
---|
315 | |
---|
316 | |
---|
317 | unsigned int smooth_size = 3 * input->h * input->w * sizeof(float) + 3 * sizeof(imagef); |
---|
318 | void *bigMemSmooth; |
---|
319 | CUDA_SAFE_CALL(cudaMalloc(&bigMemSmooth, smooth_size)); |
---|
320 | |
---|
321 | initial = (char *)bigMemSmooth; |
---|
322 | smooth_r = (imagef *)initial; |
---|
323 | initial+=sizeof(imagef); |
---|
324 | smooth_g = (imagef *)initial; |
---|
325 | initial+=sizeof(imagef); |
---|
326 | smooth_b = (imagef *)initial; |
---|
327 | initial+=sizeof(imagef); |
---|
328 | arrayfs1 = (float *)initial; |
---|
329 | initial += input->h * input->w * sizeof(float); |
---|
330 | arrayfs2 = (float *)initial; |
---|
331 | initial += input->h * input->w * sizeof(float); |
---|
332 | arrayfs3 = (float *)initial; |
---|
333 | |
---|
334 | /* |
---|
335 | if (cudaMalloc((void **) &smooth_r, sizeof(imagef)) != cudaSuccess) |
---|
336 | printf("ERROR14\n"); |
---|
337 | if (cudaMalloc((void **) &smooth_g, sizeof(imagef)) != cudaSuccess) |
---|
338 | printf("ERROR15\n"); |
---|
339 | |
---|
340 | if (cudaMalloc((void **) &smooth_b, sizeof(imagef)) != cudaSuccess) |
---|
341 | printf("ERROR16\n"); |
---|
342 | |
---|
343 | if (cudaMalloc((void **) &arrayfs1, input->h * input->w * sizeof(float))!=cudaSuccess) |
---|
344 | printf("Error malloc arrayfs1\n"); |
---|
345 | */ |
---|
346 | smooth_tmp.w = input->w; |
---|
347 | smooth_tmp.h = input->h; |
---|
348 | smooth_tmp.data = arrayfs1; |
---|
349 | if (cudaMemcpy((void *) smooth_r, (void *) &smooth_tmp, sizeof(imagef), cudaMemcpyHostToDevice)!=cudaSuccess) |
---|
350 | printf("Error memcpy smoothr\n"); |
---|
351 | |
---|
352 | // if (cudaMalloc((void **) &arrayfs2, input->h * input->w * sizeof(float))!=cudaSuccess) |
---|
353 | // printf("Error malloc arrayfs2\n"); |
---|
354 | smooth_tmp.data = arrayfs2; |
---|
355 | if (cudaMemcpy((void *) smooth_g, (void *) &smooth_tmp, sizeof(imagef), cudaMemcpyHostToDevice)!=cudaSuccess) |
---|
356 | printf("Error memcpy smoothg\n"); |
---|
357 | |
---|
358 | // if (cudaMalloc((void **) &arrayfs3, input->h * input->w * sizeof(float))!=cudaSuccess) |
---|
359 | // printf("Error malloc arrayfs3\n"); |
---|
360 | smooth_tmp.data = arrayfs3; |
---|
361 | if (cudaMemcpy((void *) smooth_b, (void *) &smooth_tmp, sizeof(imagef), cudaMemcpyHostToDevice)!=cudaSuccess) |
---|
362 | printf("Error memcpy smoothb\n"); |
---|
363 | |
---|
364 | //alloc tmp matrix for smooth |
---|
365 | // if (cudaMalloc((void **) &tmp_img, sizeof(imagef)) != cudaSuccess) |
---|
366 | // printf("ERROR17\n"); |
---|
367 | // cudaMalloc((void **) &arrayftmp, input->w * input->h * sizeof(float)); |
---|
368 | tmp_img_cpu.w = input->h; |
---|
369 | tmp_img_cpu.h = input->w; |
---|
370 | tmp_img_cpu.data = arrayftmp; |
---|
371 | if (cudaMemcpy((void *) tmp_img, (void *) &tmp_img_cpu, sizeof(imagef), cudaMemcpyHostToDevice)!=cudaSuccess) |
---|
372 | printf("Error memcpy tmp_img\n"); |
---|
373 | |
---|
374 | // if (cudaMalloc((void **) &maski, sizeof(float)*vmask.size()) != cudaSuccess) |
---|
375 | // printf("ERROR18\n"); |
---|
376 | |
---|
377 | |
---|
378 | create_rgb_smooth <<< n_blocks, block_size >>> (r, g, b, input_cuda); |
---|
379 | |
---|
380 | convolve_even_gpu <<< n_blocks, block_size >>> (r, tmp_img, maski, vmask.size()); |
---|
381 | convolve_even_gpu <<< n_blocks, block_size >>> (tmp_img, smooth_r, maski, vmask.size()); |
---|
382 | convolve_even_gpu <<< n_blocks, block_size >>> (g, tmp_img, maski, vmask.size()); |
---|
383 | convolve_even_gpu <<< n_blocks, block_size >>> (tmp_img, smooth_g, maski, vmask.size()); |
---|
384 | convolve_even_gpu <<< n_blocks, block_size >>> (b, tmp_img, maski, vmask.size()); |
---|
385 | convolve_even_gpu <<< n_blocks, block_size >>> (tmp_img, smooth_b, maski, vmask.size()); |
---|
386 | |
---|
387 | |
---|
388 | //free(mask); |
---|
389 | /* |
---|
390 | cudaFree(maski); |
---|
391 | cudaFree(tmp_img); |
---|
392 | cudaFree(r); |
---|
393 | cudaFree(g); |
---|
394 | cudaFree(b); |
---|
395 | cudaFree(arrayf1); |
---|
396 | cudaFree(arrayf2); |
---|
397 | cudaFree(arrayf3); |
---|
398 | cudaFree(arrayftmp); |
---|
399 | */ |
---|
400 | CUDA_SAFE_CALL(cudaFree(input_cuda)); |
---|
401 | CUDA_SAFE_CALL(cudaFree(array)); |
---|
402 | |
---|
403 | CUDA_SAFE_CALL(cudaFree(bigMem)); |
---|
404 | |
---|
405 | |
---|
406 | /* Alloc edges' vector on device */ |
---|
407 | //CUDA_SAFE_CALL(cudaMalloc((void **) &edges, (4 * input->h * input->w )* sizeof(edge))); |
---|
408 | //CUDA_SAFE_CALL(cudaMalloc((void **) &weights, (4 * input->h * input->w )* sizeof(float))); |
---|
409 | unsigned int size_edges = 4 * input->h * input->w * sizeof(edge) + 4 * input->h * input->w * sizeof(edge*) + 4 * input->h * input->w * sizeof(float); |
---|
410 | CUDA_SAFE_CALL(cudaMalloc((void **) &bigMem, size_edges)); |
---|
411 | |
---|
412 | //exit(0); |
---|
413 | |
---|
414 | /* |
---|
415 | if (cudaMalloc((void **) &edges, 4 * input->h * input->w * sizeof(edge)) != cudaSuccess) |
---|
416 | printf("ERROR19\n"); |
---|
417 | if (cudaMalloc((void **) &edges_location, (4 * input->h * input->w )* sizeof(edge*)) != cudaSuccess) |
---|
418 | printf("ERROR1A\n"); |
---|
419 | |
---|
420 | if (cudaMalloc((void **) &weights, (4 * input->h * input->w )* sizeof(float)) != cudaSuccess) |
---|
421 | printf("ERROR1B\n"); |
---|
422 | */ |
---|
423 | initial = (char *)bigMem; |
---|
424 | edges = (edge*)initial; |
---|
425 | initial += 4 * input->h * input->w * sizeof(edge); |
---|
426 | edges_location = (edge **)initial; |
---|
427 | initial += 4 * input->h * input->w * sizeof(edge*); |
---|
428 | weights = (float *)initial; |
---|
429 | |
---|
430 | /* Define the grid */ |
---|
431 | n_blocks.x = input->w / BLOCK_DIM_X + (input->w % BLOCK_DIM_X == 0?0:1); |
---|
432 | n_blocks.y = input->h / BLOCK_DIM_Y + (input->h % BLOCK_DIM_Y == 0?0:1); |
---|
433 | n_blocks.z = 1; |
---|
434 | |
---|
435 | printf("n_blocks.x = %d\n", n_blocks.x); |
---|
436 | printf("n_blocks.y = %d\n", n_blocks.y); |
---|
437 | |
---|
438 | |
---|
439 | block_size.x = BLOCK_DIM_X; |
---|
440 | block_size.y = BLOCK_DIM_Y; |
---|
441 | block_size.z = 1; |
---|
442 | |
---|
443 | /* Compute edges */ |
---|
444 | compute_edges <<< n_blocks, block_size >>> (input_cuda, edges, edges_location, weights,smooth_r,smooth_g,smooth_b); |
---|
445 | /* |
---|
446 | cudaFree(smooth_r); |
---|
447 | cudaFree(smooth_g); |
---|
448 | cudaFree(smooth_b); |
---|
449 | cudaFree(arrayfs1); |
---|
450 | cudaFree(arrayfs2); |
---|
451 | cudaFree(arrayfs3); |
---|
452 | */ |
---|
453 | CUDA_SAFE_CALL(cudaFree(bigMemSmooth)); |
---|
454 | |
---|
455 | ed_temp = (edge *)malloc((4 * input->h * input->w )* sizeof(edge)); |
---|
456 | if (cudaMemcpy((void *) ed_temp, (void *) edges, 4 * input->h * input->w * sizeof(edge), cudaMemcpyDeviceToHost) != cudaSuccess) |
---|
457 | printf("ERROR2\n"); |
---|
458 | |
---|
459 | /* if (cudaMemcpy((void *) indx_temp, (void *) indx, input->h * input->w * sizeof(int), cudaMemcpyDeviceToHost)) |
---|
460 | printf("ERROR3\n"); |
---|
461 | */ |
---|
462 | |
---|
463 | printf("a0 = %d\n", ed_temp[0].a); |
---|
464 | printf("b0 = %d\n", ed_temp[0].b); |
---|
465 | |
---|
466 | //sort edges |
---|
467 | /* |
---|
468 | CUDPPConfiguration config; |
---|
469 | config.op = CUDPP_MIN; |
---|
470 | config.datatype = CUDPP_FLOAT; |
---|
471 | config.algorithm = CUDPP_SORT_RADIX; |
---|
472 | config.options = CUDPP_OPTION_KEY_VALUE_PAIRS; |
---|
473 | |
---|
474 | CUDPPHandle scanplan = 0; |
---|
475 | CUDPPResult result = cudppPlan(&scanplan, config, 4 * input->h * input->w, 1, 0); |
---|
476 | |
---|
477 | result = cudppSort ( scanplan, |
---|
478 | weights, |
---|
479 | edges_location, |
---|
480 | 32, |
---|
481 | 4 * input->h * input->w |
---|
482 | ); |
---|
483 | |
---|
484 | result = cudppDestroyPlan(scanplan); |
---|
485 | if (CUDPP_SUCCESS != result) |
---|
486 | { |
---|
487 | printf("Error destroying CUDPPPlan\n"); |
---|
488 | exit(-1); |
---|
489 | } |
---|
490 | |
---|
491 | |
---|
492 | if (CUDPP_SUCCESS != result) |
---|
493 | { |
---|
494 | printf("Error creating CUDPPPlan\n"); |
---|
495 | exit(-1); |
---|
496 | } |
---|
497 | */ |
---|
498 | gettimeofday(&end, NULL); |
---|
499 | |
---|
500 | seconds = end.tv_sec - start.tv_sec; |
---|
501 | useconds = end.tv_usec - start.tv_usec; |
---|
502 | |
---|
503 | mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5; |
---|
504 | |
---|
505 | printf("Elapsed time: %ld milliseconds\n", mtime); |
---|
506 | |
---|
507 | // Cleanup |
---|
508 | //exit(0); |
---|
509 | //cudaFree(input_cuda); |
---|
510 | CUDA_SAFE_CALL(cudaFree(bigMem)); |
---|
511 | //cudaFree(array); |
---|
512 | //cudaFree(edges); |
---|
513 | //cudaFree(edges_location); |
---|
514 | //cudaFree(weights); |
---|
515 | |
---|
516 | |
---|
517 | return 0; |
---|
518 | |
---|
519 | } |
---|