1 files changed, 520 insertions, 0 deletions
diff --git a/src/common/hash.cpp b/src/common/hash.cpp
new file mode 100644
index 000000000..5303f07b5
--- /dev/null
+++ b/src/common/hash.cpp
@@ -0,0 +1,520 @@
+// Copyright 2013 Dolphin Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+#include "hash.h"
+#if _M_SSE >= 0x402
+#include "cpu_detect.h"
+#include <nmmintrin.h>
+#endif
+static u64 (*ptrHashFunction)(const u8 *src, int len, u32 samples) = &GetMurmurHash3;
+// uint32_t
+// WARNING - may read one more byte!
+// Implementation from Wikipedia.
+u32 HashFletcher(const u8* data_u8, size_t length)
+{
+    const u16* data = (const u16*)data_u8; /* Pointer to the data to be summed */
+    size_t len = (length + 1) / 2; /* Length in 16-bit words */
+    u32 sum1 = 0xffff, sum2 = 0xffff;
+    while (len)
+    {
+        size_t tlen = len > 360 ? 360 : len;
+        len -= tlen;
+        do {
+            sum1 += *data++;
+            sum2 += sum1;
+        }
+        while (--tlen);
+        sum1 = (sum1 & 0xffff) + (sum1 >> 16);
+        sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+    }
+    // Second reduction step to reduce sums to 16 bits
+    sum1 = (sum1 & 0xffff) + (sum1 >> 16);
+    sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+    return(sum2 << 16 | sum1);
+}
+// Implementation from Wikipedia
+// Slightly slower than Fletcher above, but slightly more reliable.
+#define MOD_ADLER 65521
+// data: Pointer to the data to be summed; len is in bytes
+u32 HashAdler32(const u8* data, size_t len)
+{
+    u32 a = 1, b = 0;
+    while (len)
+    {
+        size_t tlen = len > 5550 ? 5550 : len;
+        len -= tlen;
+        do
+        {
+            a += *data++;
+            b += a;
+        }
+        while (--tlen);
+        a = (a & 0xffff) + (a >> 16) * (65536 - MOD_ADLER);
+        b = (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER);
+    }
+    // It can be shown that a <= 0x1013a here, so a single subtract will do.
+    if (a >= MOD_ADLER)
+    {
+        a -= MOD_ADLER;
+    }
+    // It can be shown that b can reach 0xfff87 here.
+    b = (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER);
+    if (b >= MOD_ADLER)
+    {
+        b -= MOD_ADLER;
+    }
+    return((b << 16) | a);
+}
+// Stupid hash - but can't go back now :)
+// Don't use for new things. At least it's reasonably fast.
+u32 HashEctor(const u8* ptr, int length)
+{
+    u32 crc = 0;
+    for (int i = 0; i < length; i++)
+    {
+        crc ^= ptr[i];
+        crc = (crc << 3) | (crc >> 29);
+    }
+    return(crc);
+}
+#ifdef _M_X64
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+inline u64 getblock(const u64 * p, int i)
+{
+    return p[i];
+}
+//----------
+// Block mix - combine the key bits with the hash bits and scramble everything
+inline void bmix64(u64 & h1, u64 & h2, u64 & k1, u64 & k2, u64 & c1, u64 & c2)
+{
+    k1 *= c1; 
+    k1  = _rotl64(k1,23); 
+    k1 *= c2;
+    h1 ^= k1;
+    h1 += h2;
+    h2 = _rotl64(h2,41);
+    k2 *= c2; 
+    k2  = _rotl64(k2,23);
+    k2 *= c1;
+    h2 ^= k2;
+    h2 += h1;
+    h1 = h1*3+0x52dce729;
+    h2 = h2*3+0x38495ab5;
+    c1 = c1*5+0x7b7d159c;
+    c2 = c2*5+0x6bce6396;
+}
+//----------
+// Finalization mix - avalanches all bits to within 0.05% bias
+inline u64 fmix64(u64 k)
+{
+    k ^= k >> 33;
+    k *= 0xff51afd7ed558ccd;
+    k ^= k >> 33;
+    k *= 0xc4ceb9fe1a85ec53;
+    k ^= k >> 33;
+    return k;
+}
+u64 GetMurmurHash3(const u8 *src, int len, u32 samples)
+{
+    const u8 * data = (const u8*)src;
+    const int nblocks = len / 16;
+    u32 Step = (len / 8);
+    if(samples == 0) samples = max(Step, 1u);
+    Step = Step / samples;
+    if(Step < 1) Step = 1;
+    u64 h1 = 0x9368e53c2f6af274;
+    u64 h2 = 0x586dcd208f7cd3fd;
+    u64 c1 = 0x87c37b91114253d5;
+    u64 c2 = 0x4cf5ad432745937f;
+    //----------
+    // body
+    const u64 * blocks = (const u64 *)(data);
+    for(int i = 0; i < nblocks; i+=Step)
+    {
+        u64 k1 = getblock(blocks,i*2+0);
+        u64 k2 = getblock(blocks,i*2+1);
+        bmix64(h1,h2,k1,k2,c1,c2);
+    }
+    //----------
+    // tail
+    const u8 * tail = (const u8*)(data + nblocks*16);
+    u64 k1 = 0;
+    u64 k2 = 0;
+    switch(len & 15)
+    {
+    case 15: k2 ^= u64(tail[14]) << 48;
+    case 14: k2 ^= u64(tail[13]) << 40;
+    case 13: k2 ^= u64(tail[12]) << 32;
+    case 12: k2 ^= u64(tail[11]) << 24;
+    case 11: k2 ^= u64(tail[10]) << 16;
+    case 10: k2 ^= u64(tail[ 9]) << 8;
+    case  9: k2 ^= u64(tail[ 8]) << 0;
+    case  8: k1 ^= u64(tail[ 7]) << 56;
+    case  7: k1 ^= u64(tail[ 6]) << 48;
+    case  6: k1 ^= u64(tail[ 5]) << 40;
+    case  5: k1 ^= u64(tail[ 4]) << 32;
+    case  4: k1 ^= u64(tail[ 3]) << 24;
+    case  3: k1 ^= u64(tail[ 2]) << 16;
+    case  2: k1 ^= u64(tail[ 1]) << 8;
+    case  1: k1 ^= u64(tail[ 0]) << 0;
+            bmix64(h1,h2,k1,k2,c1,c2);
+    };
+    //----------
+    // finalization
+    h2 ^= len;
+    h1 += h2;
+    h2 += h1;
+    h1 = fmix64(h1);
+    h2 = fmix64(h2);
+    h1 += h2;
+    return h1;
+}
+// CRC32 hash using the SSE4.2 instruction
+u64 GetCRC32(const u8 *src, int len, u32 samples)
+{
+#if _M_SSE >= 0x402
+    u64 h = len;
+    u32 Step = (len / 8);
+    const u64 *data = (const u64 *)src;
+    const u64 *end = data + Step;
+    if(samples == 0) samples = max(Step, 1u);
+    Step = Step / samples;
+    if(Step < 1) Step = 1;
+    while(data < end)
+    {
+        h = _mm_crc32_u64(h, data[0]);
+        data += Step;
+    }
+    const u8 *data2 = (const u8*)end;
+    return _mm_crc32_u64(h, u64(data2[0]));
+#else
+    return 0;
+#endif
+}
+/* 
+ * NOTE: This hash function is used for custom texture loading/dumping, so
+ * it should not be changed, which would require all custom textures to be
+ * recalculated for their new hash values. If the hashing function is
+ * changed, make sure this one is still used when the legacy parameter is
+ * true.
+ */
+u64 GetHashHiresTexture(const u8 *src, int len, u32 samples)
+{
+    const u64 m = 0xc6a4a7935bd1e995;
+    u64 h = len * m;
+    const int r = 47;
+    u32 Step = (len / 8);
+    const u64 *data = (const u64 *)src;
+    const u64 *end = data + Step;
+    if(samples == 0) samples = max(Step, 1u);
+    Step = Step / samples;
+    if(Step < 1) Step = 1;
+    while(data < end)
+    {
+        u64 k = data[0];
+        data+=Step;
+        k *= m;
+        k ^= k >> r; 
+        k *= m;
+        h ^= k;
+        h *= m;
+    }
+    const u8 * data2 = (const u8*)end;
+    switch(len & 7)
+    {
+    case 7: h ^= u64(data2[6]) << 48;
+    case 6: h ^= u64(data2[5]) << 40;
+    case 5: h ^= u64(data2[4]) << 32;
+    case 4: h ^= u64(data2[3]) << 24;
+    case 3: h ^= u64(data2[2]) << 16;
+    case 2: h ^= u64(data2[1]) << 8;
+    case 1: h ^= u64(data2[0]);
+            h *= m;
+    };
+ 
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+    return h;
+} 
+#else
+// CRC32 hash using the SSE4.2 instruction
+u64 GetCRC32(const u8 *src, int len, u32 samples)
+{
+#if _M_SSE >= 0x402
+    u32 h = len;
+    u32 Step = (len/4);
+    const u32 *data = (const u32 *)src;
+    const u32 *end = data + Step;
+    if(samples == 0) samples = max(Step, 1u);
+    Step  = Step / samples;
+    if(Step < 1) Step = 1;
+    while(data < end)
+    {
+        h = _mm_crc32_u32(h, data[0]);
+        data += Step;
+    }
+    const u8 *data2 = (const u8*)end;
+    return (u64)_mm_crc32_u32(h, u32(data2[0]));
+#else
+    return 0;
+#endif
+}
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+inline u32 getblock(const u32 * p, int i)
+{
+    return p[i];
+}
+//----------
+// Finalization mix - force all bits of a hash block to avalanche
+// avalanches all bits to within 0.25% bias
+inline u32 fmix32(u32 h)
+{
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+    return h;
+}
+inline void bmix32(u32 & h1, u32 & h2, u32 & k1, u32 & k2, u32 & c1, u32 & c2)
+{
+    k1 *= c1; 
+    k1  = _rotl(k1,11); 
+    k1 *= c2;
+    h1 ^= k1;
+    h1 += h2;
+    h2 = _rotl(h2,17);
+    k2 *= c2; 
+    k2  = _rotl(k2,11);
+    k2 *= c1;
+    h2 ^= k2;
+    h2 += h1;
+    h1 = h1*3+0x52dce729;
+    h2 = h2*3+0x38495ab5;
+    c1 = c1*5+0x7b7d159c;
+    c2 = c2*5+0x6bce6396;
+}
+//----------
+u64 GetMurmurHash3(const u8* src, int len, u32 samples)
+{
+    const u8 * data = (const u8*)src;
+    u32 out[2];
+    const int nblocks = len / 8;
+    u32 Step = (len / 4);
+    if(samples == 0) samples = max(Step, 1u);
+    Step = Step / samples;
+    if(Step < 1) Step = 1;
+    u32 h1 = 0x8de1c3ac;
+    u32 h2 = 0xbab98226;
+    u32 c1 = 0x95543787;
+    u32 c2 = 0x2ad7eb25;
+    //----------
+    // body
+    const u32 * blocks = (const u32 *)(data + nblocks*8);
+    for(int i = -nblocks; i < 0; i+=Step)
+    {
+        u32 k1 = getblock(blocks,i*2+0);
+        u32 k2 = getblock(blocks,i*2+1);
+        bmix32(h1,h2,k1,k2,c1,c2);
+    }
+    //----------
+    // tail
+    
+    const u8 * tail = (const u8*)(data + nblocks*8);
+    u32 k1 = 0;
+    u32 k2 = 0;
+    switch(len & 7)
+    {
+    case 7: k2 ^= tail[6] << 16;
+    case 6: k2 ^= tail[5] << 8;
+    case 5: k2 ^= tail[4] << 0;
+    case 4: k1 ^= tail[3] << 24;
+    case 3: k1 ^= tail[2] << 16;
+    case 2: k1 ^= tail[1] << 8;
+    case 1: k1 ^= tail[0] << 0;
+            bmix32(h1,h2,k1,k2,c1,c2);
+    };
+    //----------
+    // finalization
+    h2 ^= len;
+    h1 += h2;
+    h2 += h1;
+    h1 = fmix32(h1);
+    h2 = fmix32(h2);
+    h1 += h2;
+    h2 += h1;
+    out[0] = h1;
+    out[1] = h2;
+    
+    return *((u64 *)&out);
+}
+/*
+ * FIXME: The old 32-bit version of this hash made different hashes than the
+ * 64-bit version. Until someone can make a new version of the 32-bit one that
+ * makes identical hashes, this is just a c/p of the 64-bit one.
+ */
+u64 GetHashHiresTexture(const u8 *src, int len, u32 samples)
+{
+    const u64 m = 0xc6a4a7935bd1e995ULL;
+    u64 h = len * m;
+    const int r = 47;
+    u32 Step = (len / 8);
+    const u64 *data = (const u64 *)src;
+    const u64 *end = data + Step;
+    if(samples == 0) samples = max(Step, 1u);
+    Step = Step / samples;
+    if(Step < 1) Step = 1;
+    while(data < end)
+    {
+        u64 k = data[0];
+        data+=Step;
+        k *= m; 
+        k ^= k >> r; 
+        k *= m;
+        h ^= k;
+        h *= m; 
+    }
+    const u8 * data2 = (const u8*)end;
+    switch(len & 7)
+    {
+    case 7: h ^= u64(data2[6]) << 48;
+    case 6: h ^= u64(data2[5]) << 40;
+    case 5: h ^= u64(data2[4]) << 32;
+    case 4: h ^= u64(data2[3]) << 24;
+    case 3: h ^= u64(data2[2]) << 16;
+    case 2: h ^= u64(data2[1]) << 8;
+    case 1: h ^= u64(data2[0]);
+            h *= m;
+    };
+ 
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+    return h;
+}
+#endif
+u64 GetHash64(const u8 *src, int len, u32 samples)
+{
+    return ptrHashFunction(src, len, samples);
+}
+// sets the hash function used for the texture cache
+void SetHash64Function(bool useHiresTextures)
+{
+    if (useHiresTextures)
+    {
+        ptrHashFunction = &GetHashHiresTexture;
+    }
+#if _M_SSE >= 0x402
+    else if (cpu_info.bSSE4_2 && !useHiresTextures) // sse crc32 version
+    {
+        ptrHashFunction = &GetCRC32;
+    }
+#endif
+    else
+    {
+        ptrHashFunction = &GetMurmurHash3;
+    }
+}

diff --git a/src/common/hash.cpp b/src/common/hash.cpp new file mode 100644 index 000000000..5303f07b5 --- /dev/null +++ b/src/common/hash.cpp
@@ -0,0 +1,520 @@
	1	// Copyright 2013 Dolphin Emulator Project
	2	// Licensed under GPLv2
	3	// Refer to the license.txt file included.
	4
	5
	6	#include "hash.h"
	7	#if _M_SSE >= 0x402
	8	#include "cpu_detect.h"
	9	#include <nmmintrin.h>
	10	#endif
	11
	12	static u64 (ptrHashFunction)(const u8 src, int len, u32 samples) = &GetMurmurHash3;
	13
	14	// uint32_t
	15	// WARNING - may read one more byte!
	16	// Implementation from Wikipedia.
	17	u32 HashFletcher(const u8* data_u8, size_t length)
	18	{
	19	const u16* data = (const u16)data_u8; / Pointer to the data to be summed */
	20	size_t len = (length + 1) / 2; /* Length in 16-bit words */
	21	u32 sum1 = 0xffff, sum2 = 0xffff;
	22
	23	while (len)
	24	{
	25	size_t tlen = len > 360 ? 360 : len;
	26	len -= tlen;
	27
	28	do {
	29	sum1 += *data++;
	30	sum2 += sum1;
	31	}
	32	while (--tlen);
	33
	34	sum1 = (sum1 & 0xffff) + (sum1 >> 16);
	35	sum2 = (sum2 & 0xffff) + (sum2 >> 16);
	36	}
	37
	38	// Second reduction step to reduce sums to 16 bits
	39	sum1 = (sum1 & 0xffff) + (sum1 >> 16);
	40	sum2 = (sum2 & 0xffff) + (sum2 >> 16);
	41	return(sum2 << 16 \| sum1);
	42	}
	43
	44
	45	// Implementation from Wikipedia
	46	// Slightly slower than Fletcher above, but slightly more reliable.
	47	#define MOD_ADLER 65521
	48	// data: Pointer to the data to be summed; len is in bytes
	49	u32 HashAdler32(const u8* data, size_t len)
	50	{
	51	u32 a = 1, b = 0;
	52
	53	while (len)
	54	{
	55	size_t tlen = len > 5550 ? 5550 : len;
	56	len -= tlen;
	57
	58	do
	59	{
	60	a += *data++;
	61	b += a;
	62	}
	63	while (--tlen);
	64
	65	a = (a & 0xffff) + (a >> 16) * (65536 - MOD_ADLER);
	66	b = (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER);
	67	}
	68
	69	// It can be shown that a <= 0x1013a here, so a single subtract will do.
	70	if (a >= MOD_ADLER)
	71	{
	72	a -= MOD_ADLER;
	73	}
	74
	75	// It can be shown that b can reach 0xfff87 here.
	76	b = (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER);
	77
	78	if (b >= MOD_ADLER)
	79	{
	80	b -= MOD_ADLER;
	81	}
	82
	83	return((b << 16) \| a);
	84	}
	85
	86	// Stupid hash - but can't go back now :)
	87	// Don't use for new things. At least it's reasonably fast.
	88	u32 HashEctor(const u8* ptr, int length)
	89	{
	90	u32 crc = 0;
	91
	92	for (int i = 0; i < length; i++)
	93	{
	94	crc ^= ptr[i];
	95	crc = (crc << 3) \| (crc >> 29);
	96	}
	97
	98	return(crc);
	99	}
	100
	101
	102	#ifdef _M_X64
	103
	104	//-----------------------------------------------------------------------------
	105	// Block read - if your platform needs to do endian-swapping or can only
	106	// handle aligned reads, do the conversion here
	107
	108	inline u64 getblock(const u64 * p, int i)
	109	{
	110	return p[i];
	111	}
	112
	113	//----------
	114	// Block mix - combine the key bits with the hash bits and scramble everything
	115
	116	inline void bmix64(u64 & h1, u64 & h2, u64 & k1, u64 & k2, u64 & c1, u64 & c2)
	117	{
	118	k1 *= c1;
	119	k1 = _rotl64(k1,23);
	120	k1 *= c2;
	121	h1 ^= k1;
	122	h1 += h2;
	123
	124	h2 = _rotl64(h2,41);
	125
	126	k2 *= c2;
	127	k2 = _rotl64(k2,23);
	128	k2 *= c1;
	129	h2 ^= k2;
	130	h2 += h1;
	131
	132	h1 = h1*3+0x52dce729;
	133	h2 = h2*3+0x38495ab5;
	134
	135	c1 = c1*5+0x7b7d159c;
	136	c2 = c2*5+0x6bce6396;
	137	}
	138
	139	//----------
	140	// Finalization mix - avalanches all bits to within 0.05% bias
	141
	142	inline u64 fmix64(u64 k)
	143	{
	144	k ^= k >> 33;
	145	k *= 0xff51afd7ed558ccd;
	146	k ^= k >> 33;
	147	k *= 0xc4ceb9fe1a85ec53;
	148	k ^= k >> 33;
	149
	150	return k;
	151	}
	152
	153	u64 GetMurmurHash3(const u8 *src, int len, u32 samples)
	154	{
	155	const u8 * data = (const u8*)src;
	156	const int nblocks = len / 16;
	157	u32 Step = (len / 8);
	158	if(samples == 0) samples = max(Step, 1u);
	159	Step = Step / samples;
	160	if(Step < 1) Step = 1;
	161
	162	u64 h1 = 0x9368e53c2f6af274;
	163	u64 h2 = 0x586dcd208f7cd3fd;
	164
	165	u64 c1 = 0x87c37b91114253d5;
	166	u64 c2 = 0x4cf5ad432745937f;
	167
	168
	169	//----------
	170	// body
	171
	172	const u64 * blocks = (const u64 *)(data);
	173
	174	for(int i = 0; i < nblocks; i+=Step)
	175	{
	176	u64 k1 = getblock(blocks,i*2+0);
	177	u64 k2 = getblock(blocks,i*2+1);
	178
	179	bmix64(h1,h2,k1,k2,c1,c2);
	180	}
	181
	182	//----------
	183	// tail
	184
	185	const u8 * tail = (const u8)(data + nblocks16);
	186
	187	u64 k1 = 0;
	188	u64 k2 = 0;
	189
	190	switch(len & 15)
	191	{
	192	case 15: k2 ^= u64(tail[14]) << 48;
	193	case 14: k2 ^= u64(tail[13]) << 40;
	194	case 13: k2 ^= u64(tail[12]) << 32;
	195	case 12: k2 ^= u64(tail[11]) << 24;
	196	case 11: k2 ^= u64(tail[10]) << 16;
	197	case 10: k2 ^= u64(tail[ 9]) << 8;
	198	case 9: k2 ^= u64(tail[ 8]) << 0;
	199
	200	case 8: k1 ^= u64(tail[ 7]) << 56;
	201	case 7: k1 ^= u64(tail[ 6]) << 48;
	202	case 6: k1 ^= u64(tail[ 5]) << 40;
	203	case 5: k1 ^= u64(tail[ 4]) << 32;
	204	case 4: k1 ^= u64(tail[ 3]) << 24;
	205	case 3: k1 ^= u64(tail[ 2]) << 16;
	206	case 2: k1 ^= u64(tail[ 1]) << 8;
	207	case 1: k1 ^= u64(tail[ 0]) << 0;
	208	bmix64(h1,h2,k1,k2,c1,c2);
	209	};
	210
	211	//----------
	212	// finalization
	213
	214	h2 ^= len;
	215
	216	h1 += h2;
	217	h2 += h1;
	218
	219	h1 = fmix64(h1);
	220	h2 = fmix64(h2);
	221
	222	h1 += h2;
	223
	224	return h1;
	225	}
	226
	227
	228	// CRC32 hash using the SSE4.2 instruction
	229	u64 GetCRC32(const u8 *src, int len, u32 samples)
	230	{
	231	#if _M_SSE >= 0x402
	232	u64 h = len;
	233	u32 Step = (len / 8);
	234	const u64 data = (const u64 )src;
	235	const u64 *end = data + Step;
	236	if(samples == 0) samples = max(Step, 1u);
	237	Step = Step / samples;
	238	if(Step < 1) Step = 1;
	239	while(data < end)
	240	{
	241	h = _mm_crc32_u64(h, data[0]);
	242	data += Step;
	243	}
	244
	245	const u8 data2 = (const u8)end;
	246	return _mm_crc32_u64(h, u64(data2[0]));
	247	#else
	248	return 0;
	249	#endif
	250	}
	251
	252
	253	/*
	254	* NOTE: This hash function is used for custom texture loading/dumping, so
	255	* it should not be changed, which would require all custom textures to be
	256	* recalculated for their new hash values. If the hashing function is
	257	* changed, make sure this one is still used when the legacy parameter is
	258	* true.
	259	*/
	260	u64 GetHashHiresTexture(const u8 *src, int len, u32 samples)
	261	{
	262	const u64 m = 0xc6a4a7935bd1e995;
	263	u64 h = len * m;
	264	const int r = 47;
	265	u32 Step = (len / 8);
	266	const u64 data = (const u64 )src;
	267	const u64 *end = data + Step;
	268	if(samples == 0) samples = max(Step, 1u);
	269	Step = Step / samples;
	270	if(Step < 1) Step = 1;
	271	while(data < end)
	272	{
	273	u64 k = data[0];
	274	data+=Step;
	275	k *= m;
	276	k ^= k >> r;
	277	k *= m;
	278	h ^= k;
	279	h *= m;
	280	}
	281
	282	const u8 * data2 = (const u8*)end;
	283
	284	switch(len & 7)
	285	{
	286	case 7: h ^= u64(data2[6]) << 48;
	287	case 6: h ^= u64(data2[5]) << 40;
	288	case 5: h ^= u64(data2[4]) << 32;
	289	case 4: h ^= u64(data2[3]) << 24;
	290	case 3: h ^= u64(data2[2]) << 16;
	291	case 2: h ^= u64(data2[1]) << 8;
	292	case 1: h ^= u64(data2[0]);
	293	h *= m;
	294	};
	295
	296	h ^= h >> r;
	297	h *= m;
	298	h ^= h >> r;
	299
	300	return h;
	301	}
	302	#else
	303	// CRC32 hash using the SSE4.2 instruction
	304	u64 GetCRC32(const u8 *src, int len, u32 samples)
	305	{
	306	#if _M_SSE >= 0x402
	307	u32 h = len;
	308	u32 Step = (len/4);
	309	const u32 data = (const u32 )src;
	310	const u32 *end = data + Step;
	311	if(samples == 0) samples = max(Step, 1u);
	312	Step = Step / samples;
	313	if(Step < 1) Step = 1;
	314	while(data < end)
	315	{
	316	h = _mm_crc32_u32(h, data[0]);
	317	data += Step;
	318	}
	319
	320	const u8 data2 = (const u8)end;
	321	return (u64)_mm_crc32_u32(h, u32(data2[0]));
	322	#else
	323	return 0;
	324	#endif
	325	}
	326
	327	//-----------------------------------------------------------------------------
	328	// Block read - if your platform needs to do endian-swapping or can only
	329	// handle aligned reads, do the conversion here
	330
	331	inline u32 getblock(const u32 * p, int i)
	332	{
	333	return p[i];
	334	}
	335
	336	//----------
	337	// Finalization mix - force all bits of a hash block to avalanche
	338
	339	// avalanches all bits to within 0.25% bias
	340
	341	inline u32 fmix32(u32 h)
	342	{
	343	h ^= h >> 16;
	344	h *= 0x85ebca6b;
	345	h ^= h >> 13;
	346	h *= 0xc2b2ae35;
	347	h ^= h >> 16;
	348
	349	return h;
	350	}
	351
	352	inline void bmix32(u32 & h1, u32 & h2, u32 & k1, u32 & k2, u32 & c1, u32 & c2)
	353	{
	354	k1 *= c1;
	355	k1 = _rotl(k1,11);
	356	k1 *= c2;
	357	h1 ^= k1;
	358	h1 += h2;
	359
	360	h2 = _rotl(h2,17);
	361
	362	k2 *= c2;
	363	k2 = _rotl(k2,11);
	364	k2 *= c1;
	365	h2 ^= k2;
	366	h2 += h1;
	367
	368	h1 = h1*3+0x52dce729;
	369	h2 = h2*3+0x38495ab5;
	370
	371	c1 = c1*5+0x7b7d159c;
	372	c2 = c2*5+0x6bce6396;
	373	}
	374
	375	//----------
	376
	377	u64 GetMurmurHash3(const u8* src, int len, u32 samples)
	378	{
	379	const u8 * data = (const u8*)src;
	380	u32 out[2];
	381	const int nblocks = len / 8;
	382	u32 Step = (len / 4);
	383	if(samples == 0) samples = max(Step, 1u);
	384	Step = Step / samples;
	385	if(Step < 1) Step = 1;
	386
	387	u32 h1 = 0x8de1c3ac;
	388	u32 h2 = 0xbab98226;
	389
	390	u32 c1 = 0x95543787;
	391	u32 c2 = 0x2ad7eb25;
	392
	393	//----------
	394	// body
	395
	396	const u32 * blocks = (const u32 )(data + nblocks8);
	397
	398	for(int i = -nblocks; i < 0; i+=Step)
	399	{
	400	u32 k1 = getblock(blocks,i*2+0);
	401	u32 k2 = getblock(blocks,i*2+1);
	402
	403	bmix32(h1,h2,k1,k2,c1,c2);
	404	}
	405
	406	//----------
	407	// tail
	408
	409	const u8 * tail = (const u8)(data + nblocks8);
	410
	411	u32 k1 = 0;
	412	u32 k2 = 0;
	413
	414	switch(len & 7)
	415	{
	416	case 7: k2 ^= tail[6] << 16;
	417	case 6: k2 ^= tail[5] << 8;
	418	case 5: k2 ^= tail[4] << 0;
	419	case 4: k1 ^= tail[3] << 24;
	420	case 3: k1 ^= tail[2] << 16;
	421	case 2: k1 ^= tail[1] << 8;
	422	case 1: k1 ^= tail[0] << 0;
	423	bmix32(h1,h2,k1,k2,c1,c2);
	424	};
	425
	426	//----------
	427	// finalization
	428
	429	h2 ^= len;
	430
	431	h1 += h2;
	432	h2 += h1;
	433
	434	h1 = fmix32(h1);
	435	h2 = fmix32(h2);
	436
	437	h1 += h2;
	438	h2 += h1;
	439
	440	out[0] = h1;
	441	out[1] = h2;
	442
	443	return ((u64 )&out);
	444	}
	445
	446	/*
	447	* FIXME: The old 32-bit version of this hash made different hashes than the
	448	* 64-bit version. Until someone can make a new version of the 32-bit one that
	449	* makes identical hashes, this is just a c/p of the 64-bit one.
	450	*/
	451	u64 GetHashHiresTexture(const u8 *src, int len, u32 samples)
	452	{
	453	const u64 m = 0xc6a4a7935bd1e995ULL;
	454	u64 h = len * m;
	455	const int r = 47;
	456	u32 Step = (len / 8);
	457	const u64 data = (const u64 )src;
	458	const u64 *end = data + Step;
	459	if(samples == 0) samples = max(Step, 1u);
	460	Step = Step / samples;
	461	if(Step < 1) Step = 1;
	462	while(data < end)
	463	{
	464	u64 k = data[0];
	465	data+=Step;
	466	k *= m;
	467	k ^= k >> r;
	468	k *= m;
	469	h ^= k;
	470	h *= m;
	471	}
	472
	473	const u8 * data2 = (const u8*)end;
	474
	475	switch(len & 7)
	476	{
	477	case 7: h ^= u64(data2[6]) << 48;
	478	case 6: h ^= u64(data2[5]) << 40;
	479	case 5: h ^= u64(data2[4]) << 32;
	480	case 4: h ^= u64(data2[3]) << 24;
	481	case 3: h ^= u64(data2[2]) << 16;
	482	case 2: h ^= u64(data2[1]) << 8;
	483	case 1: h ^= u64(data2[0]);
	484	h *= m;
	485	};
	486
	487	h ^= h >> r;
	488	h *= m;
	489	h ^= h >> r;
	490
	491	return h;
	492	}
	493	#endif
	494
	495	u64 GetHash64(const u8 *src, int len, u32 samples)
	496	{
	497	return ptrHashFunction(src, len, samples);
	498	}
	499
	500	// sets the hash function used for the texture cache
	501	void SetHash64Function(bool useHiresTextures)
	502	{
	503	if (useHiresTextures)
	504	{
	505	ptrHashFunction = &GetHashHiresTexture;
	506	}
	507	#if _M_SSE >= 0x402
	508	else if (cpu_info.bSSE4_2 && !useHiresTextures) // sse crc32 version
	509	{
	510	ptrHashFunction = &GetCRC32;
	511	}
	512	#endif
	513	else
	514	{
	515	ptrHashFunction = &GetMurmurHash3;
	516	}
	517	}
	518
	519
	520