ccn_bloom.c

Go to the documentation of this file.
00001 /**
00002  * @file ccn_bloom.c
00003  * @brief Support for Bloom filters.
00004  * 
00005  * Part of the CCNx C Library.
00006  *
00007  * Copyright (C) 2008, 2009 Palo Alto Research Center, Inc.
00008  *
00009  * This library is free software; you can redistribute it and/or modify it
00010  * under the terms of the GNU Lesser General Public License version 2.1
00011  * as published by the Free Software Foundation.
00012  * This library is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00015  * Lesser General Public License for more details. You should have received
00016  * a copy of the GNU Lesser General Public License along with this library;
00017  * if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
00018  * Fifth Floor, Boston, MA 02110-1301 USA.
00019  */
00020 #include <stddef.h>
00021 #include <stdlib.h>
00022 #include <string.h>
00023 #include <ccn/bloom.h>
00024 
00025 struct ccn_bloom {
00026     int n;
00027     struct ccn_bloom_wire *wire;
00028 };
00029 
00030 /**
00031  * Create an empty Bloom filter constructor
00032  * @param estimated_members is an estimate of the number of elements that
00033  *        will be inserted into the filter
00034  * @param seed is used to seed the hash functions
00035  * @returns a new, empty Bloom filter constructor
00036  */
00037 struct ccn_bloom *
00038 ccn_bloom_create(int estimated_members, const unsigned char seed[4])
00039 {
00040     struct ccn_bloom *ans = NULL;
00041     struct ccn_bloom_wire *f;
00042     int n = estimated_members;
00043     int i;
00044     ans = calloc(1, sizeof(*ans));
00045     if (ans == NULL) return(ans);
00046     f = calloc(1, sizeof(*f));
00047     if (f != NULL) {
00048         f->method = 'A';
00049         f->lg_bits = 13;
00050         /* try for about m = 12*n (m = bits in Bloom filter) */
00051         while (f->lg_bits > 3 && (1 << f->lg_bits) > n * 12)
00052             f->lg_bits--;
00053         /* optimum number of hash functions is ln(2)*(m/n); use ln(2) ~= 9/13 */
00054         f->n_hash = (9 << f->lg_bits) / (13 * n + 1);
00055         if (f->n_hash < 2)
00056             f->n_hash = 2;
00057         if (f->n_hash > 32)
00058             f->n_hash = 32;
00059         for (i = 0; i < sizeof(f->seed); i++)
00060             f->seed[i] = seed[i];
00061         ans->wire = f;
00062     }
00063     else
00064         ccn_bloom_destroy(&ans);
00065     return(ans);
00066 }
00067 
00068 const struct ccn_bloom_wire *
00069 ccn_bloom_validate_wire(const void *buf, size_t size)
00070 {
00071     const struct ccn_bloom_wire *f = (const struct ccn_bloom_wire *)buf;
00072     if (size < 9)
00073         return (NULL);
00074     if (f->lg_bits > 13 || f->lg_bits < 3)
00075         return (NULL);
00076     if (f->n_hash < 1 || f->n_hash > 32)
00077         return (NULL);
00078     if (size != (sizeof(*f) - sizeof(f->bloom)) + (1 << (f->lg_bits - 3)))
00079         return (NULL);
00080     if (!(f->reserved == 0 && f->method == 'A'))
00081         return (NULL);
00082     return(f);
00083 }
00084 
00085 struct ccn_bloom *
00086 ccn_bloom_from_wire(const void *data, size_t size)
00087 {
00088     struct ccn_bloom *ans = NULL;
00089     const struct ccn_bloom_wire *f = ccn_bloom_validate_wire(data, size);
00090     if (f != NULL) {
00091         ans = calloc(1, sizeof(*ans));
00092         if (ans == NULL) return(ans);
00093         ans->n = 1 << f->lg_bits; /* estimate */
00094         ans->wire = calloc(1, size);
00095         if (ans->wire == NULL)
00096             ccn_bloom_destroy(&ans);
00097         else
00098             memcpy(ans->wire, data, size);
00099     }
00100     return(ans);
00101 }
00102 
00103 void
00104 ccn_bloom_destroy(struct ccn_bloom **bp)
00105 {
00106     if (*bp != NULL) {
00107         if ((*bp)->wire != NULL)
00108             free((*bp)->wire);
00109         free(*bp);
00110         *bp = NULL;
00111     }
00112 }
00113 
00114 static int
00115 bloom_seed(const struct ccn_bloom_wire *f)
00116 {
00117     unsigned u;
00118     const unsigned char *s = f->seed;
00119     u = ((s[0]) << 24) |
00120         ((s[1]) << 16) |
00121         ((s[2]) << 8) |
00122         (s[3]);
00123     return(u & 0x7FFFFFFF);
00124 }
00125 
00126 static int
00127 bloom_nexthash(int s, int u)
00128 {
00129     const int k = 13; /* use this many bits of feedback shift output */
00130     int b = s & ((1 << k) - 1);
00131     /* fsr primitive polynomial (modulo 2) x**31 + x**13 + 1 */
00132     s = ((s >> k) ^ (b << (31 - k)) ^ (b << (13 - k))) + u;
00133     return(s & 0x7FFFFFFF);
00134 }
00135 
00136 /*
00137  * ccn_bloom_insert:
00138  * Returns the number of bits changed in the filter, so a zero return
00139  * means a collison has happened.
00140  */
00141 int
00142 ccn_bloom_insert(struct ccn_bloom *b, const void *key, size_t size)
00143 {
00144     
00145     int d = 0;
00146     struct ccn_bloom_wire *f = b->wire;
00147     int h, i, k, m, n, s;
00148     const unsigned char *hb = (const unsigned char *)key;
00149     n = f->n_hash;
00150     m = (8*sizeof(f->bloom) - 1) & ((1 << f->lg_bits) - 1);
00151     s = bloom_seed(f);
00152     for (k = 0; k < size; k++)
00153         s = bloom_nexthash(s, hb[k] + 1);
00154     for (i = 0; i < n; i++) {
00155         s = bloom_nexthash(s, 0);
00156         h = s & m;
00157         if (0 == (f->bloom[h >> 3] & (1 << (h & 7)))) {
00158             f->bloom[h >> 3] |= (1 << (h & 7));
00159             d++;
00160         }
00161         f->bloom[h >> 3] |= (1 << (h & 7));
00162     }
00163     b->n += 1;
00164     return(d);
00165 }
00166 
00167 int
00168 ccn_bloom_match_wire(const struct ccn_bloom_wire *f, const void *key, size_t size)
00169 {
00170     int h, i, k, m, n, s;
00171     const unsigned char *hb = (const unsigned char *)key;
00172     n = f->n_hash;
00173     m = (8*sizeof(f->bloom) - 1) & ((1 << f->lg_bits) - 1);
00174     s = bloom_seed(f);
00175     for (k = 0; k < size; k++)
00176         s = bloom_nexthash(s, hb[k] + 1);
00177     for (i = 0; i < n; i++) {
00178         s = bloom_nexthash(s, 0);
00179         if (k >= size)
00180             k = 0;
00181         h = s & m;
00182         if (0 == (f->bloom[h >> 3] & (1 << (h & 7))))
00183             return(0);
00184     }
00185     return(1);
00186 }
00187 
00188 int
00189 ccn_bloom_match(struct ccn_bloom *b, const void *key, size_t size)
00190 {
00191     return(ccn_bloom_match_wire(b->wire, key, size));
00192 }
00193 
00194 int
00195 ccn_bloom_n(struct ccn_bloom *b)
00196 {
00197     return(b->n);
00198 }
00199 
00200 int
00201 ccn_bloom_wiresize(struct ccn_bloom *b)
00202 {
00203     // XXX - in principle, this could fold the filter if it is excessively large
00204     const struct ccn_bloom_wire *f = (b->wire);
00205     if (f == NULL)
00206         return(-1);
00207     return((sizeof(*f) - sizeof(f->bloom)) + (1 << (f->lg_bits - 3)));
00208 }
00209 
00210 int
00211 ccn_bloom_store_wire(struct ccn_bloom *b, unsigned char *dest, size_t destsize)
00212 {
00213     // XXX - in principle, this could fold the filter if it is excessively large
00214     int wiresize = ccn_bloom_wiresize(b);
00215     if (wiresize < 0 || destsize != wiresize)
00216         return(-1);
00217     memcpy(dest, b->wire, destsize);
00218     return(0);
00219 }
00220 
Generated on Fri May 13 16:27:02 2011 for Content-Centric Networking in C by  doxygen 1.6.3