libBf 0.1
|
00001 #ifndef HASH_H 00002 #define HASH_H 00003 00004 #include <algorithm> 00005 #include <stdexcept> 00006 #include <vector> 00007 #include <boost/functional/hash.hpp> 00008 #include <boost/nondet_random.hpp> 00009 00010 namespace bf { 00011 00013 struct random_seed 00014 { 00015 static unsigned get() 00016 { 00017 return boost::random_device()(); 00018 } 00019 }; 00020 00023 template <unsigned Seed = 42> 00024 struct fixed_seed 00025 { 00026 static unsigned get() 00027 { 00028 return Seed; 00029 } 00030 }; 00031 00033 class basic_hasher 00034 { 00035 public: 00036 typedef std::size_t value_type; 00037 00038 basic_hasher(std::size_t seed) 00039 : seed_(seed) 00040 { 00041 } 00042 00043 template <typename T> 00044 std::size_t operator()(const T& x) const 00045 { 00046 std::size_t s = seed_; 00047 boost::hash_combine(s, x); 00048 return s; 00049 } 00050 00051 private: 00052 std::size_t seed_; 00053 }; 00054 00057 template <typename Derived> 00058 class hash_policy 00059 { 00060 public: 00066 template <typename T, typename F> 00067 void each(const T& x, F f) const 00068 { 00069 for (auto h : derived().hash(x)) 00070 f(h); 00071 } 00072 00081 template <typename T, typename F> 00082 bool any(const T& x, F f) const 00083 { 00084 for (auto h : derived().hash(x)) 00085 if (f(h)) 00086 return true; 00087 00088 return false; 00089 } 00090 00099 template <typename T, typename F> 00100 bool all(const T& x, F f) const 00101 { 00102 for (auto h : derived().hash(x)) 00103 if (! f(h)) 00104 return false; 00105 00106 return true; 00107 } 00108 00115 template <typename T, typename F> 00116 void each_with_index(const T& x, F f) const 00117 { 00118 auto h = derived().hash(x); 00119 for (unsigned i = 0; i < h.size(); ++i) 00120 f(h[i], i); 00121 } 00122 00133 template <typename T, typename F> 00134 bool any_with_index(const T& x, F f) const 00135 { 00136 auto h = derived().hash(x); 00137 for (unsigned i = 0; i < h.size(); ++i) 00138 if (f(h[i], i)) 00139 return true; 00140 00141 return false; 00142 } 00143 00154 template <typename T, typename F> 00155 bool all_with_index(const T& x, F f) const 00156 { 00157 auto h = derived().hash(x); 00158 for (unsigned i = 0; i < h.size(); ++i) 00159 if (! f(h[i], i)) 00160 return false; 00161 00162 return true; 00163 } 00164 00165 private: 00166 // 00167 // CRTP interface 00168 // 00169 Derived& derived() 00170 { 00171 return *static_cast<Derived*>(this); 00172 } 00173 00174 const Derived& derived() const 00175 { 00176 return *static_cast<const Derived*>(this); 00177 } 00178 }; 00179 00183 template <typename Hasher = basic_hasher, typename Seed = fixed_seed<42>> 00184 class default_hashing : public hash_policy<default_hashing<Hasher, Seed>> 00185 { 00186 typedef Hasher hasher; 00187 typedef Seed seed; 00188 typedef std::vector<hasher> hasher_vector; 00189 public: 00190 typedef typename hasher::value_type value_type; 00191 typedef std::vector<value_type> hash_vector; 00192 00193 public: 00194 default_hashing(unsigned k) 00195 { 00196 if (k == 0) 00197 throw std::invalid_argument("zero hash functions"); 00198 00199 for (unsigned i = 0; i < k; ++i) 00200 hashers_.push_back(hasher(seed::get() + i)); 00201 } 00202 00203 unsigned k() const 00204 { 00205 return hashers_.size(); 00206 } 00207 00208 template <typename T> 00209 hash_vector hash(const T&x) const 00210 { 00211 hash_vector h(k(), 0); 00212 for (typename hasher_vector::size_type i = 0; i < k(); ++i) 00213 h[i] = hash(i, x); 00214 00215 return h; 00216 } 00217 00218 private: 00219 template <typename T> 00220 value_type hash(unsigned i, const T& x) const 00221 { 00222 return hashers_[i](x); 00223 } 00224 00225 hasher_vector hashers_; 00226 }; 00227 00232 template < 00233 typename Hasher = basic_hasher, 00234 typename Seed1 = fixed_seed<42>, 00235 typename Seed2 = fixed_seed<4711> 00236 > 00237 class double_hashing : public hash_policy<double_hashing<Hasher, Seed1, Seed2>> 00238 { 00239 public: 00240 typedef Hasher hasher; 00241 typedef Seed1 seed1; 00242 typedef Seed2 seed2; 00243 typedef typename hasher::value_type value_type; 00244 typedef std::vector<value_type> hash_vector; 00245 00246 public: 00247 double_hashing(unsigned k) 00248 : k_(k) 00249 , h1_(seed1::get()) 00250 , h2_(seed2::get()) 00251 { 00252 if (k == 0) 00253 throw std::invalid_argument("zero hash functions"); 00254 } 00255 00256 unsigned k() const 00257 { 00258 return k_; 00259 } 00260 00261 template <typename T> 00262 hash_vector hash(const T&x) const 00263 { 00264 auto h1 = h1_(x); 00265 auto h2 = h2_(x); 00266 00267 hash_vector h(k(), 0); 00268 for (unsigned i = 0; i < k_; ++i) 00269 h[i] = hash(i, h1, h2); 00270 00271 return h; 00272 } 00273 00274 protected: 00275 value_type hash(unsigned i, value_type h1, value_type h2) const 00276 { 00277 return h1 + i * h2; 00278 } 00279 00280 private: 00281 unsigned k_; 00282 hasher h1_; 00283 hasher h2_; 00284 }; 00285 00290 template < 00291 typename Hasher = basic_hasher, 00292 typename Seed1 = fixed_seed<42>, 00293 typename Seed2 = fixed_seed<4711> 00294 > 00295 class extended_double_hashing : public double_hashing<Hasher, Seed1, Seed2> 00296 { 00297 typedef double_hashing<Hasher, Seed1, Seed2> base; 00298 typedef typename base::value_type value_type; 00299 public: 00300 typedef std::function<value_type(unsigned)> functor_type; 00301 00302 public: 00303 extended_double_hashing(unsigned k, functor_type f) 00304 : double_hashing<Hasher, Seed1, Seed2>(k) 00305 , f_(f) 00306 { 00307 } 00308 00309 protected: 00310 value_type hash(unsigned i, value_type h1, value_type h2) const 00311 { 00312 return h1 + i * h1 + f_(i); 00313 } 00314 00315 private: 00316 functor_type f_; 00317 }; 00318 00319 } // namespace bf 00320 00321 #endif