Page 1 of 1

printf problem

PostPosted: Wed Dec 05, 2012 9:53 am
by ev66
Having problem with prinf statement. NVidia drivers upto date.

I'm basically executing:

myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger); //runs on GPU for approx 1second

printf("Round 1);

myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger); //runs on GPU for approx 1second

printf("Round 2);


The problem is that it does not print anything (i.e. "Round 1") to the screen until the end of the program. Then it prints everything.

Can you help me please.

Thanks.

Code below:


Code: Select all
#include "common/book.h"
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <windows.h>





int main(void){

system("cls");

__global__ void myGPU( char *toFind, char *ptgot, char *ptsofar, int *thenum, int *ptstep, unsigned long long  *abab);




char hashToFind[33] = "134A6F1411B9379E72039BF26D5CC605"; // zzzzzz 6
//char hashToFind2[33] = "F705E6FBA2A396336AEE679A0E5203BF"; // xxxxxx  6
char sofar[] ="xxxxxx";
char found[] = "xxxxxx";
int myint[1]={0};
unsigned long long trigger[1]={0};
int step[1]={2};

char *pthashToFind;
char *ptfound;
char *ptsofar;
int *ptmyint;
unsigned long long  *pttrigger;
int *ptstep;



// ROUND 1
HANDLE_ERROR( cudaMalloc( (void**)&pthashToFind, 33 * sizeof(char)));
HANDLE_ERROR( cudaMalloc( (void**)&ptfound, 7 * sizeof(char)));
HANDLE_ERROR( cudaMalloc( (void**)&ptsofar, 11 * sizeof(char)));
HANDLE_ERROR( cudaMalloc( (void**)&ptmyint, sizeof(int)));
HANDLE_ERROR( cudaMalloc( (void**)&ptstep, sizeof(int)));
HANDLE_ERROR( cudaMalloc( (void**)&pttrigger, sizeof(unsigned long long )));



HANDLE_ERROR( cudaMemcpy(pthashToFind, hashToFind, 33 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptfound, found, 7 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptsofar, sofar, 11 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptmyint, myint, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptstep, step, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(pttrigger, trigger, sizeof(unsigned long long ), cudaMemcpyHostToDevice) );

cudaEvent_t start, stop;
HANDLE_ERROR( cudaEventCreate( &start ));
HANDLE_ERROR( cudaEventCreate( &stop ));
HANDLE_ERROR( cudaEventRecord( start, 0 ));

//dim3 grid(200,200);



myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger);
//myGPU<<<500,2>>>(pthashToFind, ptplText, myint);

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

HANDLE_ERROR( cudaMemcpy (hashToFind, pthashToFind,33 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (found, ptfound,7 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (sofar, ptsofar,11 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (myint, ptmyint, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (step, ptstep, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (trigger, pttrigger, sizeof(unsigned long long ), cudaMemcpyDeviceToHost) );


printf("\n\ntrigger %d\n\n", trigger[0]);

printf("\nfound %s\n\n", found);
printf("\n\nsofar: %s\n\n", sofar);
printf("\n\nmyint %d\n\n", myint[0]);
printf("\nRound 1n");
//cudaFree( b );

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

float elapsedTime;

int persec;

HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, start, stop));

persec = myint[0] / elapsedTime / 1000;

printf( "Time gone: %3.1f ms\n", elapsedTime );
printf("\n\n%d million per second\n\n", persec);

//HANDLE_ERROR( cudaFree( myint));
HANDLE_ERROR( cudaEventDestroy( start ));
HANDLE_ERROR( cudaEventDestroy( stop));
// END ROUND 1


// ROUND 2



HANDLE_ERROR( cudaMemcpy(pthashToFind, hashToFind, 33 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptfound, found, 7 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptsofar, sofar, 11 * sizeof(char), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptmyint, myint, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(ptstep, step, sizeof(int), cudaMemcpyHostToDevice) );
HANDLE_ERROR( cudaMemcpy(pttrigger, trigger, sizeof(unsigned long long ), cudaMemcpyHostToDevice) );

//cudaEvent_t start, stop;
HANDLE_ERROR( cudaEventCreate( &start ));
HANDLE_ERROR( cudaEventCreate( &stop ));
HANDLE_ERROR( cudaEventRecord( start, 0 ));

//dim3 grid(200,200);



myGPU<<<50000,500>>>(pthashToFind, ptfound, ptsofar, ptmyint, ptstep, pttrigger);
//myGPU<<<500,2>>>(pthashToFind, ptplText, myint);

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

HANDLE_ERROR( cudaMemcpy (hashToFind, pthashToFind,33 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (found, ptfound,7 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (sofar, ptsofar,11 * sizeof(char), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (myint, ptmyint, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (step, ptstep, sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR( cudaMemcpy (trigger, pttrigger, sizeof(unsigned long long ), cudaMemcpyDeviceToHost) );


printf("\n\ntrigger %d\n\n", trigger[0]);

printf("\nfound %s\n\n", found);
printf("\n\nsofar: %s\n\n", sofar);
printf("\n\nmyint %d\n\n", myint[0]);
printf("\nRound 2n");
//cudaFree( b );

HANDLE_ERROR( cudaEventRecord( stop, 0));
HANDLE_ERROR( cudaEventSynchronize( stop));

//float elapsedTime;

//int persec;

HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, start, stop));

persec = myint[0] / elapsedTime / 1000;

printf( "Time gone: %3.1f ms\n", elapsedTime );
printf("\n\n%d million per second\n\n", persec);

//HANDLE_ERROR( cudaFree( myint));
HANDLE_ERROR( cudaEventDestroy( start ));
HANDLE_ERROR( cudaEventDestroy( stop));
// END ROUND 2
return 0;

}

Re: printf problem

PostPosted: Mon Dec 10, 2012 3:31 am
by Bitweasil
Try adding

fflush(stdout);

after each kernel invocation. Kernel messages are usually queued and output in a block.