diff options
Diffstat (limited to 'src/common/hash.cpp')
| -rw-r--r-- | src/common/hash.cpp | 520 |
1 files changed, 520 insertions, 0 deletions
diff --git a/src/common/hash.cpp b/src/common/hash.cpp new file mode 100644 index 000000000..5303f07b5 --- /dev/null +++ b/src/common/hash.cpp | |||
| @@ -0,0 +1,520 @@ | |||
| 1 | // Copyright 2013 Dolphin Emulator Project | ||
| 2 | // Licensed under GPLv2 | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | |||
| 6 | #include "hash.h" | ||
| 7 | #if _M_SSE >= 0x402 | ||
| 8 | #include "cpu_detect.h" | ||
| 9 | #include <nmmintrin.h> | ||
| 10 | #endif | ||
| 11 | |||
| 12 | static u64 (*ptrHashFunction)(const u8 *src, int len, u32 samples) = &GetMurmurHash3; | ||
| 13 | |||
| 14 | // uint32_t | ||
| 15 | // WARNING - may read one more byte! | ||
| 16 | // Implementation from Wikipedia. | ||
| 17 | u32 HashFletcher(const u8* data_u8, size_t length) | ||
| 18 | { | ||
| 19 | const u16* data = (const u16*)data_u8; /* Pointer to the data to be summed */ | ||
| 20 | size_t len = (length + 1) / 2; /* Length in 16-bit words */ | ||
| 21 | u32 sum1 = 0xffff, sum2 = 0xffff; | ||
| 22 | |||
| 23 | while (len) | ||
| 24 | { | ||
| 25 | size_t tlen = len > 360 ? 360 : len; | ||
| 26 | len -= tlen; | ||
| 27 | |||
| 28 | do { | ||
| 29 | sum1 += *data++; | ||
| 30 | sum2 += sum1; | ||
| 31 | } | ||
| 32 | while (--tlen); | ||
| 33 | |||
| 34 | sum1 = (sum1 & 0xffff) + (sum1 >> 16); | ||
| 35 | sum2 = (sum2 & 0xffff) + (sum2 >> 16); | ||
| 36 | } | ||
| 37 | |||
| 38 | // Second reduction step to reduce sums to 16 bits | ||
| 39 | sum1 = (sum1 & 0xffff) + (sum1 >> 16); | ||
| 40 | sum2 = (sum2 & 0xffff) + (sum2 >> 16); | ||
| 41 | return(sum2 << 16 | sum1); | ||
| 42 | } | ||
| 43 | |||
| 44 | |||
| 45 | // Implementation from Wikipedia | ||
| 46 | // Slightly slower than Fletcher above, but slightly more reliable. | ||
| 47 | #define MOD_ADLER 65521 | ||
| 48 | // data: Pointer to the data to be summed; len is in bytes | ||
| 49 | u32 HashAdler32(const u8* data, size_t len) | ||
| 50 | { | ||
| 51 | u32 a = 1, b = 0; | ||
| 52 | |||
| 53 | while (len) | ||
| 54 | { | ||
| 55 | size_t tlen = len > 5550 ? 5550 : len; | ||
| 56 | len -= tlen; | ||
| 57 | |||
| 58 | do | ||
| 59 | { | ||
| 60 | a += *data++; | ||
| 61 | b += a; | ||
| 62 | } | ||
| 63 | while (--tlen); | ||
| 64 | |||
| 65 | a = (a & 0xffff) + (a >> 16) * (65536 - MOD_ADLER); | ||
| 66 | b = (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER); | ||
| 67 | } | ||
| 68 | |||
| 69 | // It can be shown that a <= 0x1013a here, so a single subtract will do. | ||
| 70 | if (a >= MOD_ADLER) | ||
| 71 | { | ||
| 72 | a -= MOD_ADLER; | ||
| 73 | } | ||
| 74 | |||
| 75 | // It can be shown that b can reach 0xfff87 here. | ||
| 76 | b = (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER); | ||
| 77 | |||
| 78 | if (b >= MOD_ADLER) | ||
| 79 | { | ||
| 80 | b -= MOD_ADLER; | ||
| 81 | } | ||
| 82 | |||
| 83 | return((b << 16) | a); | ||
| 84 | } | ||
| 85 | |||
| 86 | // Stupid hash - but can't go back now :) | ||
| 87 | // Don't use for new things. At least it's reasonably fast. | ||
| 88 | u32 HashEctor(const u8* ptr, int length) | ||
| 89 | { | ||
| 90 | u32 crc = 0; | ||
| 91 | |||
| 92 | for (int i = 0; i < length; i++) | ||
| 93 | { | ||
| 94 | crc ^= ptr[i]; | ||
| 95 | crc = (crc << 3) | (crc >> 29); | ||
| 96 | } | ||
| 97 | |||
| 98 | return(crc); | ||
| 99 | } | ||
| 100 | |||
| 101 | |||
| 102 | #ifdef _M_X64 | ||
| 103 | |||
| 104 | //----------------------------------------------------------------------------- | ||
| 105 | // Block read - if your platform needs to do endian-swapping or can only | ||
| 106 | // handle aligned reads, do the conversion here | ||
| 107 | |||
| 108 | inline u64 getblock(const u64 * p, int i) | ||
| 109 | { | ||
| 110 | return p[i]; | ||
| 111 | } | ||
| 112 | |||
| 113 | //---------- | ||
| 114 | // Block mix - combine the key bits with the hash bits and scramble everything | ||
| 115 | |||
| 116 | inline void bmix64(u64 & h1, u64 & h2, u64 & k1, u64 & k2, u64 & c1, u64 & c2) | ||
| 117 | { | ||
| 118 | k1 *= c1; | ||
| 119 | k1 = _rotl64(k1,23); | ||
| 120 | k1 *= c2; | ||
| 121 | h1 ^= k1; | ||
| 122 | h1 += h2; | ||
| 123 | |||
| 124 | h2 = _rotl64(h2,41); | ||
| 125 | |||
| 126 | k2 *= c2; | ||
| 127 | k2 = _rotl64(k2,23); | ||
| 128 | k2 *= c1; | ||
| 129 | h2 ^= k2; | ||
| 130 | h2 += h1; | ||
| 131 | |||
| 132 | h1 = h1*3+0x52dce729; | ||
| 133 | h2 = h2*3+0x38495ab5; | ||
| 134 | |||
| 135 | c1 = c1*5+0x7b7d159c; | ||
| 136 | c2 = c2*5+0x6bce6396; | ||
| 137 | } | ||
| 138 | |||
| 139 | //---------- | ||
| 140 | // Finalization mix - avalanches all bits to within 0.05% bias | ||
| 141 | |||
| 142 | inline u64 fmix64(u64 k) | ||
| 143 | { | ||
| 144 | k ^= k >> 33; | ||
| 145 | k *= 0xff51afd7ed558ccd; | ||
| 146 | k ^= k >> 33; | ||
| 147 | k *= 0xc4ceb9fe1a85ec53; | ||
| 148 | k ^= k >> 33; | ||
| 149 | |||
| 150 | return k; | ||
| 151 | } | ||
| 152 | |||
| 153 | u64 GetMurmurHash3(const u8 *src, int len, u32 samples) | ||
| 154 | { | ||
| 155 | const u8 * data = (const u8*)src; | ||
| 156 | const int nblocks = len / 16; | ||
| 157 | u32 Step = (len / 8); | ||
| 158 | if(samples == 0) samples = max(Step, 1u); | ||
| 159 | Step = Step / samples; | ||
| 160 | if(Step < 1) Step = 1; | ||
| 161 | |||
| 162 | u64 h1 = 0x9368e53c2f6af274; | ||
| 163 | u64 h2 = 0x586dcd208f7cd3fd; | ||
| 164 | |||
| 165 | u64 c1 = 0x87c37b91114253d5; | ||
| 166 | u64 c2 = 0x4cf5ad432745937f; | ||
| 167 | |||
| 168 | |||
| 169 | //---------- | ||
| 170 | // body | ||
| 171 | |||
| 172 | const u64 * blocks = (const u64 *)(data); | ||
| 173 | |||
| 174 | for(int i = 0; i < nblocks; i+=Step) | ||
| 175 | { | ||
| 176 | u64 k1 = getblock(blocks,i*2+0); | ||
| 177 | u64 k2 = getblock(blocks,i*2+1); | ||
| 178 | |||
| 179 | bmix64(h1,h2,k1,k2,c1,c2); | ||
| 180 | } | ||
| 181 | |||
| 182 | //---------- | ||
| 183 | // tail | ||
| 184 | |||
| 185 | const u8 * tail = (const u8*)(data + nblocks*16); | ||
| 186 | |||
| 187 | u64 k1 = 0; | ||
| 188 | u64 k2 = 0; | ||
| 189 | |||
| 190 | switch(len & 15) | ||
| 191 | { | ||
| 192 | case 15: k2 ^= u64(tail[14]) << 48; | ||
| 193 | case 14: k2 ^= u64(tail[13]) << 40; | ||
| 194 | case 13: k2 ^= u64(tail[12]) << 32; | ||
| 195 | case 12: k2 ^= u64(tail[11]) << 24; | ||
| 196 | case 11: k2 ^= u64(tail[10]) << 16; | ||
| 197 | case 10: k2 ^= u64(tail[ 9]) << 8; | ||
| 198 | case 9: k2 ^= u64(tail[ 8]) << 0; | ||
| 199 | |||
| 200 | case 8: k1 ^= u64(tail[ 7]) << 56; | ||
| 201 | case 7: k1 ^= u64(tail[ 6]) << 48; | ||
| 202 | case 6: k1 ^= u64(tail[ 5]) << 40; | ||
| 203 | case 5: k1 ^= u64(tail[ 4]) << 32; | ||
| 204 | case 4: k1 ^= u64(tail[ 3]) << 24; | ||
| 205 | case 3: k1 ^= u64(tail[ 2]) << 16; | ||
| 206 | case 2: k1 ^= u64(tail[ 1]) << 8; | ||
| 207 | case 1: k1 ^= u64(tail[ 0]) << 0; | ||
| 208 | bmix64(h1,h2,k1,k2,c1,c2); | ||
| 209 | }; | ||
| 210 | |||
| 211 | //---------- | ||
| 212 | // finalization | ||
| 213 | |||
| 214 | h2 ^= len; | ||
| 215 | |||
| 216 | h1 += h2; | ||
| 217 | h2 += h1; | ||
| 218 | |||
| 219 | h1 = fmix64(h1); | ||
| 220 | h2 = fmix64(h2); | ||
| 221 | |||
| 222 | h1 += h2; | ||
| 223 | |||
| 224 | return h1; | ||
| 225 | } | ||
| 226 | |||
| 227 | |||
| 228 | // CRC32 hash using the SSE4.2 instruction | ||
| 229 | u64 GetCRC32(const u8 *src, int len, u32 samples) | ||
| 230 | { | ||
| 231 | #if _M_SSE >= 0x402 | ||
| 232 | u64 h = len; | ||
| 233 | u32 Step = (len / 8); | ||
| 234 | const u64 *data = (const u64 *)src; | ||
| 235 | const u64 *end = data + Step; | ||
| 236 | if(samples == 0) samples = max(Step, 1u); | ||
| 237 | Step = Step / samples; | ||
| 238 | if(Step < 1) Step = 1; | ||
| 239 | while(data < end) | ||
| 240 | { | ||
| 241 | h = _mm_crc32_u64(h, data[0]); | ||
| 242 | data += Step; | ||
| 243 | } | ||
| 244 | |||
| 245 | const u8 *data2 = (const u8*)end; | ||
| 246 | return _mm_crc32_u64(h, u64(data2[0])); | ||
| 247 | #else | ||
| 248 | return 0; | ||
| 249 | #endif | ||
| 250 | } | ||
| 251 | |||
| 252 | |||
| 253 | /* | ||
| 254 | * NOTE: This hash function is used for custom texture loading/dumping, so | ||
| 255 | * it should not be changed, which would require all custom textures to be | ||
| 256 | * recalculated for their new hash values. If the hashing function is | ||
| 257 | * changed, make sure this one is still used when the legacy parameter is | ||
| 258 | * true. | ||
| 259 | */ | ||
| 260 | u64 GetHashHiresTexture(const u8 *src, int len, u32 samples) | ||
| 261 | { | ||
| 262 | const u64 m = 0xc6a4a7935bd1e995; | ||
| 263 | u64 h = len * m; | ||
| 264 | const int r = 47; | ||
| 265 | u32 Step = (len / 8); | ||
| 266 | const u64 *data = (const u64 *)src; | ||
| 267 | const u64 *end = data + Step; | ||
| 268 | if(samples == 0) samples = max(Step, 1u); | ||
| 269 | Step = Step / samples; | ||
| 270 | if(Step < 1) Step = 1; | ||
| 271 | while(data < end) | ||
| 272 | { | ||
| 273 | u64 k = data[0]; | ||
| 274 | data+=Step; | ||
| 275 | k *= m; | ||
| 276 | k ^= k >> r; | ||
| 277 | k *= m; | ||
| 278 | h ^= k; | ||
| 279 | h *= m; | ||
| 280 | } | ||
| 281 | |||
| 282 | const u8 * data2 = (const u8*)end; | ||
| 283 | |||
| 284 | switch(len & 7) | ||
| 285 | { | ||
| 286 | case 7: h ^= u64(data2[6]) << 48; | ||
| 287 | case 6: h ^= u64(data2[5]) << 40; | ||
| 288 | case 5: h ^= u64(data2[4]) << 32; | ||
| 289 | case 4: h ^= u64(data2[3]) << 24; | ||
| 290 | case 3: h ^= u64(data2[2]) << 16; | ||
| 291 | case 2: h ^= u64(data2[1]) << 8; | ||
| 292 | case 1: h ^= u64(data2[0]); | ||
| 293 | h *= m; | ||
| 294 | }; | ||
| 295 | |||
| 296 | h ^= h >> r; | ||
| 297 | h *= m; | ||
| 298 | h ^= h >> r; | ||
| 299 | |||
| 300 | return h; | ||
| 301 | } | ||
| 302 | #else | ||
| 303 | // CRC32 hash using the SSE4.2 instruction | ||
| 304 | u64 GetCRC32(const u8 *src, int len, u32 samples) | ||
| 305 | { | ||
| 306 | #if _M_SSE >= 0x402 | ||
| 307 | u32 h = len; | ||
| 308 | u32 Step = (len/4); | ||
| 309 | const u32 *data = (const u32 *)src; | ||
| 310 | const u32 *end = data + Step; | ||
| 311 | if(samples == 0) samples = max(Step, 1u); | ||
| 312 | Step = Step / samples; | ||
| 313 | if(Step < 1) Step = 1; | ||
| 314 | while(data < end) | ||
| 315 | { | ||
| 316 | h = _mm_crc32_u32(h, data[0]); | ||
| 317 | data += Step; | ||
| 318 | } | ||
| 319 | |||
| 320 | const u8 *data2 = (const u8*)end; | ||
| 321 | return (u64)_mm_crc32_u32(h, u32(data2[0])); | ||
| 322 | #else | ||
| 323 | return 0; | ||
| 324 | #endif | ||
| 325 | } | ||
| 326 | |||
| 327 | //----------------------------------------------------------------------------- | ||
| 328 | // Block read - if your platform needs to do endian-swapping or can only | ||
| 329 | // handle aligned reads, do the conversion here | ||
| 330 | |||
| 331 | inline u32 getblock(const u32 * p, int i) | ||
| 332 | { | ||
| 333 | return p[i]; | ||
| 334 | } | ||
| 335 | |||
| 336 | //---------- | ||
| 337 | // Finalization mix - force all bits of a hash block to avalanche | ||
| 338 | |||
| 339 | // avalanches all bits to within 0.25% bias | ||
| 340 | |||
| 341 | inline u32 fmix32(u32 h) | ||
| 342 | { | ||
| 343 | h ^= h >> 16; | ||
| 344 | h *= 0x85ebca6b; | ||
| 345 | h ^= h >> 13; | ||
| 346 | h *= 0xc2b2ae35; | ||
| 347 | h ^= h >> 16; | ||
| 348 | |||
| 349 | return h; | ||
| 350 | } | ||
| 351 | |||
| 352 | inline void bmix32(u32 & h1, u32 & h2, u32 & k1, u32 & k2, u32 & c1, u32 & c2) | ||
| 353 | { | ||
| 354 | k1 *= c1; | ||
| 355 | k1 = _rotl(k1,11); | ||
| 356 | k1 *= c2; | ||
| 357 | h1 ^= k1; | ||
| 358 | h1 += h2; | ||
| 359 | |||
| 360 | h2 = _rotl(h2,17); | ||
| 361 | |||
| 362 | k2 *= c2; | ||
| 363 | k2 = _rotl(k2,11); | ||
| 364 | k2 *= c1; | ||
| 365 | h2 ^= k2; | ||
| 366 | h2 += h1; | ||
| 367 | |||
| 368 | h1 = h1*3+0x52dce729; | ||
| 369 | h2 = h2*3+0x38495ab5; | ||
| 370 | |||
| 371 | c1 = c1*5+0x7b7d159c; | ||
| 372 | c2 = c2*5+0x6bce6396; | ||
| 373 | } | ||
| 374 | |||
| 375 | //---------- | ||
| 376 | |||
| 377 | u64 GetMurmurHash3(const u8* src, int len, u32 samples) | ||
| 378 | { | ||
| 379 | const u8 * data = (const u8*)src; | ||
| 380 | u32 out[2]; | ||
| 381 | const int nblocks = len / 8; | ||
| 382 | u32 Step = (len / 4); | ||
| 383 | if(samples == 0) samples = max(Step, 1u); | ||
| 384 | Step = Step / samples; | ||
| 385 | if(Step < 1) Step = 1; | ||
| 386 | |||
| 387 | u32 h1 = 0x8de1c3ac; | ||
| 388 | u32 h2 = 0xbab98226; | ||
| 389 | |||
| 390 | u32 c1 = 0x95543787; | ||
| 391 | u32 c2 = 0x2ad7eb25; | ||
| 392 | |||
| 393 | //---------- | ||
| 394 | // body | ||
| 395 | |||
| 396 | const u32 * blocks = (const u32 *)(data + nblocks*8); | ||
| 397 | |||
| 398 | for(int i = -nblocks; i < 0; i+=Step) | ||
| 399 | { | ||
| 400 | u32 k1 = getblock(blocks,i*2+0); | ||
| 401 | u32 k2 = getblock(blocks,i*2+1); | ||
| 402 | |||
| 403 | bmix32(h1,h2,k1,k2,c1,c2); | ||
| 404 | } | ||
| 405 | |||
| 406 | //---------- | ||
| 407 | // tail | ||
| 408 | |||
| 409 | const u8 * tail = (const u8*)(data + nblocks*8); | ||
| 410 | |||
| 411 | u32 k1 = 0; | ||
| 412 | u32 k2 = 0; | ||
| 413 | |||
| 414 | switch(len & 7) | ||
| 415 | { | ||
| 416 | case 7: k2 ^= tail[6] << 16; | ||
| 417 | case 6: k2 ^= tail[5] << 8; | ||
| 418 | case 5: k2 ^= tail[4] << 0; | ||
| 419 | case 4: k1 ^= tail[3] << 24; | ||
| 420 | case 3: k1 ^= tail[2] << 16; | ||
| 421 | case 2: k1 ^= tail[1] << 8; | ||
| 422 | case 1: k1 ^= tail[0] << 0; | ||
| 423 | bmix32(h1,h2,k1,k2,c1,c2); | ||
| 424 | }; | ||
| 425 | |||
| 426 | //---------- | ||
| 427 | // finalization | ||
| 428 | |||
| 429 | h2 ^= len; | ||
| 430 | |||
| 431 | h1 += h2; | ||
| 432 | h2 += h1; | ||
| 433 | |||
| 434 | h1 = fmix32(h1); | ||
| 435 | h2 = fmix32(h2); | ||
| 436 | |||
| 437 | h1 += h2; | ||
| 438 | h2 += h1; | ||
| 439 | |||
| 440 | out[0] = h1; | ||
| 441 | out[1] = h2; | ||
| 442 | |||
| 443 | return *((u64 *)&out); | ||
| 444 | } | ||
| 445 | |||
| 446 | /* | ||
| 447 | * FIXME: The old 32-bit version of this hash made different hashes than the | ||
| 448 | * 64-bit version. Until someone can make a new version of the 32-bit one that | ||
| 449 | * makes identical hashes, this is just a c/p of the 64-bit one. | ||
| 450 | */ | ||
| 451 | u64 GetHashHiresTexture(const u8 *src, int len, u32 samples) | ||
| 452 | { | ||
| 453 | const u64 m = 0xc6a4a7935bd1e995ULL; | ||
| 454 | u64 h = len * m; | ||
| 455 | const int r = 47; | ||
| 456 | u32 Step = (len / 8); | ||
| 457 | const u64 *data = (const u64 *)src; | ||
| 458 | const u64 *end = data + Step; | ||
| 459 | if(samples == 0) samples = max(Step, 1u); | ||
| 460 | Step = Step / samples; | ||
| 461 | if(Step < 1) Step = 1; | ||
| 462 | while(data < end) | ||
| 463 | { | ||
| 464 | u64 k = data[0]; | ||
| 465 | data+=Step; | ||
| 466 | k *= m; | ||
| 467 | k ^= k >> r; | ||
| 468 | k *= m; | ||
| 469 | h ^= k; | ||
| 470 | h *= m; | ||
| 471 | } | ||
| 472 | |||
| 473 | const u8 * data2 = (const u8*)end; | ||
| 474 | |||
| 475 | switch(len & 7) | ||
| 476 | { | ||
| 477 | case 7: h ^= u64(data2[6]) << 48; | ||
| 478 | case 6: h ^= u64(data2[5]) << 40; | ||
| 479 | case 5: h ^= u64(data2[4]) << 32; | ||
| 480 | case 4: h ^= u64(data2[3]) << 24; | ||
| 481 | case 3: h ^= u64(data2[2]) << 16; | ||
| 482 | case 2: h ^= u64(data2[1]) << 8; | ||
| 483 | case 1: h ^= u64(data2[0]); | ||
| 484 | h *= m; | ||
| 485 | }; | ||
| 486 | |||
| 487 | h ^= h >> r; | ||
| 488 | h *= m; | ||
| 489 | h ^= h >> r; | ||
| 490 | |||
| 491 | return h; | ||
| 492 | } | ||
| 493 | #endif | ||
| 494 | |||
| 495 | u64 GetHash64(const u8 *src, int len, u32 samples) | ||
| 496 | { | ||
| 497 | return ptrHashFunction(src, len, samples); | ||
| 498 | } | ||
| 499 | |||
| 500 | // sets the hash function used for the texture cache | ||
| 501 | void SetHash64Function(bool useHiresTextures) | ||
| 502 | { | ||
| 503 | if (useHiresTextures) | ||
| 504 | { | ||
| 505 | ptrHashFunction = &GetHashHiresTexture; | ||
| 506 | } | ||
| 507 | #if _M_SSE >= 0x402 | ||
| 508 | else if (cpu_info.bSSE4_2 && !useHiresTextures) // sse crc32 version | ||
| 509 | { | ||
| 510 | ptrHashFunction = &GetCRC32; | ||
| 511 | } | ||
| 512 | #endif | ||
| 513 | else | ||
| 514 | { | ||
| 515 | ptrHashFunction = &GetMurmurHash3; | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | |||
| 520 | |||