-
Notifications
You must be signed in to change notification settings - Fork 0
/
hash_lookup.h
166 lines (155 loc) · 5.07 KB
/
hash_lookup.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/*
hash_lookup.h
A dictionary for storing key-value pairs.
Utilises the C Minimum Perfect Hashing library (http://cmph.sourceforge.net/)
*/
#ifndef HASH_LOOKUP
#define HASH_LOOKUP
#include <cmph.h>
#include <gmp.h>
#include <assert.h>
#include <stdlib.h>
#include "karp_rabin.h"
/*
typedef struct hash_lookup
Structure for holding the pairs.
Components:
cmph_t *hash - The hash function
int *values - The values
char *keys - The keys
int num - The number of items
*/
typedef struct {
cmph_t *hash;
int num;
char *last_key;
int key_size;
fingerprint *keys;
int *end_pattern;
} hash_lookup;
/*
hashlookup_build
Constructs a hash_lookup object.
Components:
char **keys - A list of the keys as strings 1 character long
int *values - A list of the values for each key
int num - The number of key-value pairs
Returns hash_lookup:
The constructed dictionary
*/
hash_lookup hashlookup_build(fingerprint *prints, int *end_pattern, int num, fingerprinter printer) {
hash_lookup lookup;
lookup.num = num;
if (num > 1) {
int size = (printer->p->_mp_size * (sizeof(mp_limb_t) << 1) + 1) * 3;
char **keys = malloc(sizeof(char*) * num);
int i;
int *sizes = malloc(sizeof(int) * num);
for (i = 0; i < num; i++) {
keys[i] = malloc(sizeof(char) * size);
sizes[i] = gmp_snprintf(keys[i], size, "%Zx,%Zx,%Zx", prints[i]->finger, prints[i]->r_k, prints[i]->r_mk);
}
cmph_io_adapter_t *source = cmph_io_vector_adapter(keys, num);
cmph_config_t *config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_CHD);
lookup.hash = cmph_new(config);
cmph_config_destroy(config);
cmph_io_vector_adapter_destroy(source);
lookup.keys = malloc(sizeof(fingerprint) * num);
if (end_pattern) lookup.end_pattern = malloc(sizeof(int) * num);
else lookup.end_pattern = NULL;
int location;
int *visited = malloc(sizeof(int) * num);
for (i = 0; i < num; i++) {
visited[i] = -1;
}
for (i = 0; i < num; i++) {
location = cmph_search(lookup.hash, keys[i], sizes[i]);
lookup.keys[location] = init_fingerprint();
assert(visited[location] == -1); //Check for collisions
visited[location] = i;
fingerprint_assign(prints[i], lookup.keys[location]);
free(keys[i]);
if (end_pattern) lookup.end_pattern[location] = end_pattern[i];
}
free(keys);
free(visited);
lookup.key_size = size;
lookup.last_key = malloc(sizeof(char) * size);
} else if (num == 1) {
lookup.keys = malloc(sizeof(fingerprint));
lookup.keys[0] = init_fingerprint();
fingerprint_assign(prints[0], lookup.keys[0]);
if (end_pattern) {
lookup.end_pattern = malloc(sizeof(int));
lookup.end_pattern[0] = end_pattern[0];
} else {
lookup.end_pattern = NULL;
}
}
return lookup;
}
/*
hashlookup_search
Searches the dictionary for the corresponding value to a key.
Parameters:
hash_lookup lookup - The dictionary to search
char key - The key to search for
Returns int:
values[lookup.hash(key)] if key \in keys
0 otherwise
*/
int hashlookup_search(hash_lookup lookup, fingerprint key, int *match) {
if (lookup.num == 0) return -1;
if (lookup.num == 1) {
if (fingerprint_equals(key, lookup.keys[0])) {
if (lookup.end_pattern && match) *match |= lookup.end_pattern[0];
return 0;
}
return -1;
}
int size = gmp_snprintf(lookup.last_key, lookup.key_size, "%Zx,%Zx,%Zx", key->finger, key->r_k, key->r_mk);
int location = cmph_search(lookup.hash, lookup.last_key, size);
if ((location < lookup.num) && fingerprint_equals(lookup.keys[location], key)) {
if (lookup.end_pattern && match) *match |= lookup.end_pattern[location];
return location;
}
return -1;
}
/*
hashlookup_free
Frees the dictionary.
Parameters:
hash_lookup *lookup - The dictionary to free
*/
void hashlookup_free(hash_lookup *lookup) {
if (lookup->num > 1) {
cmph_destroy(lookup->hash);
free(lookup->last_key);
int i;
for (i = 0; i < lookup->num; i++) {
fingerprint_free(lookup->keys[i]);
}
} else if (lookup->num == 1) {
fingerprint_free(lookup->keys[0]);
}
if (lookup->num) {
free(lookup->keys);
if (lookup->end_pattern) free(lookup->end_pattern);
}
}
int hashlookup_size(hash_lookup lookup) {
int size = sizeof(fingerprint) * lookup.num;
if (lookup.end_pattern) {
size += sizeof(int) * lookup.num;
}
if (lookup.num > 1) {
size += cmph_packed_size(lookup.hash) + sizeof(char) * lookup.key_size;
}
int i;
for (i = 0; i < lookup.num; i++) {
size += fingerprint_size(lookup.keys[i]);
}
return size;
}
#endif