1 /**
2 Copyright: Copyright (c) 2020, Joakim Brännström. All rights reserved.
3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0)
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 A convenient library for calculating the hash of bits of data.
7 */
8 module my.hash;
9 
10 import std.digest.crc : CRC64ISO;
11 import std.digest.murmurhash : MurmurHash3;
12 
13 import std.format : FormatSpec;
14 import std.format : formatValue, formattedWrite;
15 import std.range.primitives : put;
16 
17 alias BuildChecksum64 = CRC64ISO;
18 alias Checksum64 = Crc64Iso;
19 alias makeChecksum64 = makeCrc64Iso;
20 alias toChecksum64 = toCrc64Iso;
21 
22 alias BuildChecksum128 = MurmurHash3!(128, 64);
23 alias Checksum128 = Murmur3;
24 alias makeChecksum128 = makeMurmur3;
25 alias toChecksum128 = toMurmur3;
26 
27 /// Convert a value to its ubyte representation.
28 auto toBytes(T)(T v) @trusted pure nothrow @nogc {
29     import std.conv : emplace;
30 
31     ubyte[T.sizeof] d;
32     T* p = cast(T*)&d;
33     cast(void) emplace!T(p, v);
34     return d;
35 }
36 
37 long toLong(ubyte[8] v) @trusted pure nothrow @nogc {
38     return *(cast(long*)&v);
39 }
40 
41 ulong toUlong(ubyte[8] v) @trusted pure nothrow @nogc {
42     return *(cast(ulong*)&v);
43 }
44 
45 /// Convert to size_to for use in e.g. operator overload toHash.
46 size_t toSizeT(T)(T v) if (is(T : uint) || is(T : ulong)) {
47     static if (size_t.sizeof == 4 && T.sizeof == 8)
48         return cast(uint) v + cast(uint)(v >> 32);
49     else
50         return v;
51 }
52 
53 /// ditto.
54 size_t toSizeT(const(ubyte)[4] v) @trusted pure nothrow @nogc {
55     return toSizeT(*(cast(const(uint)*)&v));
56 }
57 
58 /// ditto.
59 size_t toSizeT(const(ubyte)[8] v) @trusted pure nothrow @nogc {
60     return toSizeT(*(cast(const(ulong)*)&v));
61 }
62 
63 Murmur3 makeMurmur3(const(ubyte)[] p) @safe nothrow {
64     BuildChecksum128 hasher;
65     hasher.put(p);
66     return toMurmur3(hasher);
67 }
68 
69 /// Convenient function to convert to a checksum type.
70 Murmur3 toMurmur3(const(ubyte)[16] p) @trusted pure nothrow @nogc {
71     ulong a = *(cast(ulong*)&p[0]);
72     ulong b = *(cast(ulong*)&p[8]);
73     return Murmur3(a, b);
74 }
75 
76 Murmur3 toMurmur3(ref BuildChecksum128 h) @safe pure nothrow @nogc {
77     return toMurmur3(h.finish);
78 }
79 
80 /// 128bit hash.
81 struct Murmur3 {
82     ulong c0;
83     ulong c1;
84 
85     size_t toHash() @safe nothrow const pure @nogc {
86         return (c0 + c1).toSizeT;
87     }
88 
89     bool opEquals(const typeof(this) o) const nothrow @safe pure @nogc {
90         return c0 == o.c0 && c1 == o.c1;
91     }
92 
93     int opCmp(ref const typeof(this) rhs) @safe pure nothrow const @nogc {
94         // return -1 if "this" is less than rhs, 1 if bigger and zero equal
95         if (c0 < rhs.c0)
96             return -1;
97         if (c0 > rhs.c0)
98             return 1;
99         if (c1 < rhs.c1)
100             return -1;
101         if (c1 > rhs.c1)
102             return 1;
103         return 0;
104     }
105 
106     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
107         if (fmt.spec == 'x')
108             formattedWrite(w, "%x_%x", c0, c1);
109         else
110             formattedWrite(w, "%s_%s", c0, c1);
111     }
112 }
113 
114 /// Create a 64bit hash.
115 Crc64Iso makeCrc64Iso(const(ubyte)[] p) @trusted pure nothrow @nogc {
116     BuildChecksum64 hash;
117     hash.put(p);
118     return toCrc64Iso(hash);
119 }
120 
121 /// Convenient function to convert to a checksum type.
122 Crc64Iso toCrc64Iso(const(ubyte)[8] p) @trusted pure nothrow @nogc {
123     return Crc64Iso(*(cast(ulong*)&p[0]));
124 }
125 
126 Crc64Iso toCrc64Iso(ref BuildChecksum64 h) @trusted pure nothrow @nogc {
127     ubyte[8] v = h.peek;
128     return Crc64Iso(*(cast(ulong*)&v[0]));
129 }
130 
131 /** 64-bit checksum.
132  *
133  * It is intended to be generically used in Dextool when such a checksum is needed.
134  *
135  * CRC64 ISO is used because there exist implementations in other languages
136  * which makes it possible to calculate the checksum in e.g. python and compare
137  * with the one from Dextool.
138  *
139  * TODO: check if python have a 64ISO or 64ECMA implementation.
140  */
141 struct Crc64Iso {
142     ulong c0;
143 
144     size_t toHash() @safe pure nothrow const @nogc scope {
145         return c0;
146     }
147 
148     bool opEquals(const typeof(this) s) @safe pure nothrow const @nogc scope {
149         return c0 == s.c0;
150     }
151 
152     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
153         if (fmt.spec == 'x')
154             formattedWrite(w, "%x", c0);
155         else
156             formattedWrite(w, "%s", c0);
157     }
158 }