1-((1-odds_of_matching_16bits_on_any_hash) * (1-odds_of_matching_32bits_on_any_hash))
instead of
odds_of_matching_32bits_on_any_hash
This is the way I would do it:
- check 15 bits on sharedBitmap (4 KiB)
- if this only weeds out like 1 in 50 or well obviously if it's all ones it should not be used
- 4 KiB is better than 8 KiB because 3 thread blocks can fit in shared memory per multiprocessor instead of just 1 - check 29 to 33 bits on DEVICE_Bitmap (64 MiB to 1024 MiB)
- use 24 to 27 bits on DEVICE_ChainedHashTable (64 MiB + 4 bytes to 512 MiB + 4 bytes)
- linear search
Here's a nice little function:
- Code: Select all
/*
* Find the largest bitmap that fits in memory then the largest hash table that fits in memory.
* Minimum size for both the bitmap and hash table is 64 MiB.
* Which is OK since I'm pretty sure the least amount of memory any CUDA enabled card has is 256 MiB.
* Maximum size for the bitmap is 4 GiB and 16 GiB for the hash table.
* This function should be changed when cards have over 8 GiB. Also change the way to do a look up in the bitmap and hash table.
*
* [in] totalRam is the total amount of video ram in MiB
* [in] reserveRam is the total of video ram in MiB to not use (should be around 16 MiB to be safe)
* [in] numHashes is the total number of hashes trying to crack
* [out] deviceBitmapBits is the number of bits for the bitmap on the device
* [out] deviceHashTableBits is the number of bits for the hash table on the device
* returns true on success
*/
bool calcBitmapAndHashTableBits(uint32_t totalRam, uint32_t reserveRam, uint32_t numHashes, uint32_t &deviceBitmapBits, uint32_t &deviceHashTableBits)
{
const uint32_t HASH_SIZE = 16 + 16 + 4; // hash size + password size + hash index
uint32_t ramToUse, ramUsed, hashRam;
hashRam = ((HASH_SIZE * (numHashes >> 8)) >> 12) + 1;
ramToUse = totalRam - reserveRam - hashRam - 64; // 64 MiB is for the hash table
ramUsed = 64; // use a minimum of 64 MiB for the bitmap on the device
deviceBitmapBits = 29;
do
{
ramUsed <<= 1;
deviceBitmapBits++;
} while (ramUsed <= ramToUse && deviceBitmapBits < 35);
ramUsed >>= 1;
deviceBitmapBits--;
ramToUse = totalRam - reserveRam - hashRam - ramUsed;
ramUsed = 64; // use a minimum of 64 MiB for the hash table on the device
deviceHashTableBits = 24;
do
{
ramUsed <<= 1;
deviceHashTableBits++;
} while (ramUsed <= ramToUse && deviceHashTableBits < 32);
deviceHashTableBits--;
ramUsed = reserveRam + hashRam + (1 << (deviceBitmapBits - 23)) + (1 << (deviceHashTableBits - 18));
return ramUsed <= totalRam;
}
- Code: Select all
__device__ inline void checkHash(...) // :)
{
bool goodToGo = false;
if ((s_bitmap[a >> 20] & (1 << (a & 0x7))) && // grab the highest 12 bits from a and grab the lowest 3 bits from a
(d_bitmap[b >> (35 - deviceBitmapBits)] & (1 << (c >> 29)))) // grab the highest deviceBitmapBits - 3 bits from b and grab the highest 3 bits from c
{
uint32_t x = c & ((1 << deviceHashTableBits) - 1); // grab the lowest deviceHashTableBits bits from c
uint32_t start = d_chainedHashTable[x];
uint32_t end = d_chainedHashTable[x+1];
uint64_t ad = (((uint64_t)a) << 32) | d;
for (x = start; x < end; x++)
{
if (d_hashes[2*x] == ad)
{
// I never decided whether to:
// stop here at 88 to 91 bits matching and report partial match somehow (and have the CPU check the rest)
// or continue and check the full hash
goodToGo = true;
break;
}
}
}
__syncthreads();
if (goodToGo)
{
uint64_t bc = (((uint64_t)b) << 32) | c;
do
{
if (d_hashes[2*x+1] == bc)
{
// Report successful
// Atomic functions require at least compute capability 1.1 :(
uint32_t y;
if (compute_capability_1_0) // :)
{
y = x;
d_crackedHash[y] = 1;
}
else
{
y = atomicAdd(d_crackedCount, 1);
d_crackedHashIndex[y] = x;
}
d_crackedPw[y] = curPw; // :)
break;
}
x++;
} while (d_hashes[2*x] == ad);
}
__syncthreads();
}
Oh right in your code numberOfPasswords should be called numberOfHashes.
Hopefully that helps/works. I haven't even tested to see if there's any syntax errors