printf problem

Discussion and support for the CUDA Multiforcers (Windows and Linux)
  • Ads

printf problem

Postby ev66 » Wed Dec 05, 2012 9:53 am

Having problem with prinf statement. NVidia drivers upto date.

I'm basically executing:

myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger); //runs on GPU for approx 1second

printf("Round 1);

myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger); //runs on GPU for approx 1second

printf("Round 2);


The problem is that it does not print anything (i.e. "Round 1") to the screen until the end of the program. Then it prints everything.

Can you help me please.

Thanks.

Code below:


Code: Select all
#include "common/book.h"
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <windows.h>





int main(void){

system("cls");

__global__ void myGPU( char *toFind, char *ptgot, char *ptsofar, int *thenum, int *ptstep, unsigned long long  *abab);




char hashToFind[33] = "134A6F1411B9379E72039BF26D5CC605"; // zzzzzz 6
//char hashToFind2[33] = "F705E6FBA2A396336AEE679A0E5203BF"; // xxxxxx  6
char sofar[] ="xxxxxx";
char found[] = "xxxxxx";
int myint[1]={0};
unsigned long long trigger[1]={0};
int step[1]={2};

char *pthashToFind;
char *ptfound;
char *ptsofar;
int *ptmyint;
unsigned long long  *pttrigger;
int *ptstep;



// ROUND 1
HANDLE_ERROR( cudaMalloc( (void**)&pthashToFind, 33 * sizeof(char)));
HANDLE_ERROR( cudaMalloc( (void**)&ptfound, 7 * sizeof(char)));
HANDLE_ERROR( cudaMalloc( (void**)&ptsofar, 11 * sizeof(char)));
HANDLE_ERROR( cudaMalloc( (void**)&ptmyint, sizeof(int)));
HANDLE_ERROR( cudaMalloc( (void**)&ptstep, sizeof(int)));
HANDLE_ERROR( cudaMalloc( (void**)&pttrigger, sizeof(unsigned long long )));



HANDLE_ERROR( cudaMemcpy(pthashToFind, hashToFind, 33 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptfound, found, 7 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptsofar, sofar, 11 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptmyint, myint, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptstep, step, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(pttrigger, trigger, sizeof(unsigned long long ), cudaMemcpyHostToDevice) );

cudaEvent_t start, stop;
HANDLE_ERROR( cudaEventCreate( &start ));
HANDLE_ERROR( cudaEventCreate( &stop ));
HANDLE_ERROR( cudaEventRecord( start, 0 ));

//dim3 grid(200,200);



myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger);
//myGPU<<<500,2>>>(pthashToFind, ptplText, myint);

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

HANDLE_ERROR( cudaMemcpy (hashToFind, pthashToFind,33 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (found, ptfound,7 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (sofar, ptsofar,11 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (myint, ptmyint, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (step, ptstep, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (trigger, pttrigger, sizeof(unsigned long long ), cudaMemcpyDeviceToHost) );


printf("\n\ntrigger %d\n\n", trigger[0]);

printf("\nfound %s\n\n", found);
printf("\n\nsofar: %s\n\n", sofar);
printf("\n\nmyint %d\n\n", myint[0]);
printf("\nRound 1n");
//cudaFree( b );

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

float elapsedTime;

int persec;

HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, start, stop));

persec = myint[0] / elapsedTime / 1000;

printf( "Time gone: %3.1f ms\n", elapsedTime );
printf("\n\n%d million per second\n\n", persec);

//HANDLE_ERROR( cudaFree( myint));
HANDLE_ERROR( cudaEventDestroy( start ));
HANDLE_ERROR( cudaEventDestroy( stop));
// END ROUND 1


// ROUND 2



HANDLE_ERROR( cudaMemcpy(pthashToFind, hashToFind, 33 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptfound, found, 7 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptsofar, sofar, 11 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptmyint, myint, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptstep, step, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(pttrigger, trigger, sizeof(unsigned long long ), cudaMemcpyHostToDevice) );

//cudaEvent_t start, stop;
HANDLE_ERROR( cudaEventCreate( &start ));
HANDLE_ERROR( cudaEventCreate( &stop ));
HANDLE_ERROR( cudaEventRecord( start, 0 ));

//dim3 grid(200,200);



myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger);
//myGPU<<<500,2>>>(pthashToFind, ptplText, myint);

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

HANDLE_ERROR( cudaMemcpy (hashToFind, pthashToFind,33 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (found, ptfound,7 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (sofar, ptsofar,11 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (myint, ptmyint, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (step, ptstep, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (trigger, pttrigger, sizeof(unsigned long long ), cudaMemcpyDeviceToHost) );


printf("\n\ntrigger %d\n\n", trigger[0]);

printf("\nfound %s\n\n", found);
printf("\n\nsofar: %s\n\n", sofar);
printf("\n\nmyint %d\n\n", myint[0]);
printf("\nRound 2n");
//cudaFree( b );

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

//float elapsedTime;

//int persec;

HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, start, stop));

persec = myint[0] / elapsedTime / 1000;

printf( "Time gone: %3.1f ms\n", elapsedTime );
printf("\n\n%d million per second\n\n", persec);

//HANDLE_ERROR( cudaFree( myint));
HANDLE_ERROR( cudaEventDestroy( start ));
HANDLE_ERROR( cudaEventDestroy( stop));
// END ROUND 2
return 0;

}
ev66
 
Posts: 1
Joined: Wed Dec 05, 2012 9:51 am

Re: printf problem

Postby Bitweasil » Mon Dec 10, 2012 3:31 am

Try adding

fflush(stdout);

after each kernel invocation. Kernel messages are usually queued and output in a block.
Bitweasil
Site Admin
 
Posts: 912
Joined: Tue Jan 20, 2009 4:26 pm


Return to CUDA Multiforcers

Who is online

Users browsing this forum: No registered users and 1 guest

cron