libBf 0.1

hash.h

00001 #ifndef HASH_H
00002 #define HASH_H
00003 
00004 #include <algorithm>
00005 #include <stdexcept>
00006 #include <vector>
00007 #include <boost/functional/hash.hpp>
00008 #include <boost/nondet_random.hpp>
00009 
00010 namespace bf {
00011 
00013 struct random_seed
00014 {
00015     static unsigned get()
00016     {
00017         return boost::random_device()();
00018     }
00019 };
00020 
00023 template <unsigned Seed = 42>
00024 struct fixed_seed
00025 {
00026     static unsigned get()
00027     {
00028         return Seed;
00029     }
00030 };
00031 
00033 class basic_hasher
00034 {
00035 public:
00036     typedef std::size_t value_type;
00037 
00038     basic_hasher(std::size_t seed)
00039       : seed_(seed)
00040     {
00041     }
00042 
00043     template <typename T>
00044     std::size_t operator()(const T& x) const
00045     {
00046         std::size_t s = seed_;
00047         boost::hash_combine(s, x);
00048         return s;
00049     }
00050 
00051 private:
00052     std::size_t seed_;
00053 };
00054 
00057 template <typename Derived>
00058 class hash_policy
00059 {
00060 public:
00066     template <typename T, typename F>
00067     void each(const T& x, F f) const
00068     {
00069         for (auto h : derived().hash(x))
00070             f(h);
00071     }
00072 
00081     template <typename T, typename F>
00082     bool any(const T& x, F f) const
00083     {
00084         for (auto h : derived().hash(x))
00085             if (f(h))
00086                 return true;
00087 
00088         return false;
00089     }
00090 
00099     template <typename T, typename F>
00100     bool all(const T& x, F f) const
00101     {
00102         for (auto h : derived().hash(x))
00103             if (! f(h))
00104                 return false;
00105 
00106         return true;
00107     }
00108 
00115     template <typename T, typename F>
00116     void each_with_index(const T& x, F f) const
00117     {
00118         auto h = derived().hash(x);
00119         for (unsigned i = 0; i < h.size(); ++i)
00120             f(h[i], i);
00121     }
00122 
00133     template <typename T, typename F>
00134     bool any_with_index(const T& x, F f) const
00135     {
00136         auto h = derived().hash(x);
00137         for (unsigned i = 0; i < h.size(); ++i)
00138             if (f(h[i], i))
00139                 return true;
00140 
00141         return false;
00142     }
00143 
00154     template <typename T, typename F>
00155     bool all_with_index(const T& x, F f) const
00156     {
00157         auto h = derived().hash(x);
00158         for (unsigned i = 0; i < h.size(); ++i)
00159             if (! f(h[i], i))
00160                 return false;
00161 
00162         return true;
00163     }
00164 
00165 private:
00166     //
00167     // CRTP interface
00168     //
00169     Derived& derived()
00170     {
00171         return *static_cast<Derived*>(this);
00172     }
00173 
00174     const Derived& derived() const
00175     {
00176         return *static_cast<const Derived*>(this);
00177     }
00178 };
00179 
00183 template <typename Hasher = basic_hasher, typename Seed = fixed_seed<42>>
00184 class default_hashing : public hash_policy<default_hashing<Hasher, Seed>>
00185 {
00186     typedef Hasher hasher;
00187     typedef Seed seed;
00188     typedef std::vector<hasher> hasher_vector;
00189 public:
00190     typedef typename hasher::value_type value_type;
00191     typedef std::vector<value_type> hash_vector;
00192 
00193 public:
00194     default_hashing(unsigned k)
00195     {
00196         if (k == 0)
00197             throw std::invalid_argument("zero hash functions");
00198 
00199         for (unsigned i = 0; i < k; ++i)
00200             hashers_.push_back(hasher(seed::get() + i));
00201     }
00202 
00203     unsigned k() const
00204     {
00205         return hashers_.size();
00206     }
00207 
00208     template <typename T>
00209     hash_vector hash(const T&x) const
00210     {
00211         hash_vector h(k(), 0);
00212         for (typename hasher_vector::size_type i = 0; i < k(); ++i)
00213             h[i] = hash(i, x);
00214 
00215         return h;
00216     }
00217 
00218 private:
00219     template <typename T>
00220     value_type hash(unsigned i, const T& x) const
00221     {
00222         return hashers_[i](x);
00223     }
00224 
00225     hasher_vector hashers_;
00226 };
00227 
00232 template <
00233     typename Hasher = basic_hasher,
00234     typename Seed1 = fixed_seed<42>,
00235     typename Seed2 = fixed_seed<4711>
00236 >
00237 class double_hashing : public hash_policy<double_hashing<Hasher, Seed1, Seed2>>
00238 {
00239 public:
00240     typedef Hasher hasher;
00241     typedef Seed1 seed1;
00242     typedef Seed2 seed2;
00243     typedef typename hasher::value_type value_type;
00244     typedef std::vector<value_type> hash_vector;
00245 
00246 public:
00247     double_hashing(unsigned k)
00248       : k_(k)
00249       , h1_(seed1::get())
00250       , h2_(seed2::get())
00251     {
00252         if (k == 0)
00253             throw std::invalid_argument("zero hash functions");
00254     }
00255 
00256     unsigned k() const
00257     {
00258         return k_;
00259     }
00260 
00261     template <typename T>
00262     hash_vector hash(const T&x) const
00263     {
00264         auto h1 = h1_(x);
00265         auto h2 = h2_(x);
00266 
00267         hash_vector h(k(), 0);
00268         for (unsigned i = 0; i < k_; ++i)
00269             h[i] = hash(i, h1, h2);
00270 
00271         return h;
00272     }
00273 
00274 protected:
00275     value_type hash(unsigned i, value_type h1, value_type h2) const
00276     {
00277         return h1 + i * h2;
00278     }
00279 
00280 private:
00281     unsigned k_;
00282     hasher h1_;
00283     hasher h2_;
00284 };
00285 
00290 template <
00291     typename Hasher = basic_hasher,
00292     typename Seed1 = fixed_seed<42>,
00293     typename Seed2 = fixed_seed<4711>
00294 >
00295 class extended_double_hashing : public double_hashing<Hasher, Seed1, Seed2>
00296 {
00297     typedef double_hashing<Hasher, Seed1, Seed2> base;
00298     typedef typename base::value_type value_type;
00299 public:
00300     typedef std::function<value_type(unsigned)> functor_type;
00301 
00302 public:
00303     extended_double_hashing(unsigned k, functor_type f)
00304       : double_hashing<Hasher, Seed1, Seed2>(k)
00305       , f_(f)
00306     {
00307     }
00308 
00309 protected:
00310     value_type hash(unsigned i, value_type h1, value_type h2) const
00311     {
00312         return h1 + i * h1 + f_(i);
00313     }
00314 
00315 private:
00316     functor_type f_;
00317 };
00318 
00319 } // namespace bf
00320 
00321 #endif
 All Classes Functions Variables