36 #include "FaissAssert.h" 
   38 static const size_t BLOCKSIZE_QUERY = 8192;
 
   43 static const uint8_t hamdis_tab_ham_bytes[256] = {
 
   44     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
 
   45     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 
   46     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 
   47     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 
   48     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 
   49     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 
   50     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 
   51     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 
   52     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 
   53     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 
   54     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 
   55     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 
   56     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 
   57     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 
   58     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 
   59     4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 
   64 template <
size_t nbits, 
typename T>
 
   65 T hamming (
const uint8_t *bs1,
 
   68     const size_t nbytes = nbits / 8;
 
   71     for (i = 0; i < nbytes; i++)
 
   72         h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
 
   78 template <
size_t nbits>
 
   79 hamdis_t hamming (
const uint64_t * bs1, 
const uint64_t * bs2)
 
   81     const size_t nwords = nbits / 64;
 
   84     for (i = 0; i < nwords; i++)
 
   85         h += popcount64 (bs1[i] ^ bs2[i]);
 
   93 hamdis_t hamming<64> (
const uint64_t * pa, 
const uint64_t * pb)
 
   95     return popcount64 (pa[0] ^ pb[0]);
 
  100 hamdis_t hamming<128> (
const uint64_t *pa, 
const uint64_t *pb)
 
  102     return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
 
  107 hamdis_t hamming<256> (
const uint64_t * pa, 
const uint64_t * pb)
 
  109     return  popcount64 (pa[0] ^ pb[0])
 
  110           + popcount64 (pa[1] ^ pb[1])
 
  111           + popcount64 (pa[2] ^ pb[2])
 
  112           + popcount64 (pa[3] ^ pb[3]);
 
  118         const uint64_t * bs1,
 
  119         const uint64_t * bs2,
 
  124     for (i = 0; i < nwords; i++)
 
  125         h += popcount64 (bs1[i] ^ bs2[i]);
 
  131 template <
size_t nbits>
 
  133         const uint64_t * bs1,
 
  134         const uint64_t * bs2,
 
  135         size_t n1, 
size_t n2,
 
  140     const size_t nwords = nbits / 64;
 
  141     for (i = 0; i < n1; i++) {
 
  142       const uint64_t * __restrict bs1_ = bs1 + i * nwords;
 
  143       hamdis_t * __restrict dis_ = dis + i * n2;
 
  144       for (j = 0; j < n2; j++)
 
  145         dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
 
  152         const uint64_t * bs1,
 
  153         const uint64_t * bs2,
 
  157         hamdis_t * __restrict dis)
 
  162     for (i = 0; i < n1; i+=nwords) {
 
  163         const uint64_t * bs1_ = bs1+i;
 
  164         for (j = 0; j < n2; j+=nwords)
 
  165             dis[j] = hamming (bs1_, bs2+j, nwords);
 
  173 template <
size_t nbits>
 
  174 void hamming_count_thres (
 
  175         const uint64_t * bs1,
 
  176         const uint64_t * bs2,
 
  182     const size_t nwords = nbits / 64;
 
  183     size_t i, j, posm = 0;
 
  184     const uint64_t * bs2_ = bs2;
 
  186     for (i = 0; i < n1; i++) {
 
  188         for (j = 0; j < n2; j++) {
 
  190             if (hamming <nbits> (bs1, bs2) <= ht)
 
  200 template <
size_t nbits>
 
  201 void crosshamming_count_thres (
 
  202         const uint64_t * dbs,
 
  207     const size_t nwords = nbits / 64;
 
  208     size_t i, j, posm = 0;
 
  209     const uint64_t * bs1 = dbs;
 
  210     for (i = 0; i < n; i++) {
 
  211         const uint64_t * bs2 = bs1 + 2;
 
  212         for (j = i + 1; j < n; j++) {
 
  214             if (hamming <nbits> (bs1, bs2) <= ht)
 
  224 template <
size_t nbits>
 
  225 size_t match_hamming_thres (
 
  226         const uint64_t * bs1,
 
  227         const uint64_t * bs2,
 
  234     const size_t nwords = nbits / 64;
 
  235     size_t i, j, posm = 0;
 
  237     const uint64_t * bs2_ = bs2;
 
  238     for (i = 0; i < n1; i++) {
 
  240         for (j = 0; j < n2; j++) {
 
  242             h = hamming <nbits> (bs1, bs2);
 
  262 template <
class HammingComputer>
 
  264 void hammings_knn_hc (
 
  266         int_maxheap_array_t * ha,
 
  271         bool init_heap = 
true)
 
  276     if (init_heap) ha->heapify ();
 
  280 #pragma omp parallel for 
  281     for (
size_t i = 0; i < ha->nh; i++) {
 
  282         HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
 
  284         const uint8_t * bs2_ = bs2;
 
  286         hamdis_t * __restrict bh_val_ = ha->val + i * k;
 
  287         long * __restrict bh_ids_ = ha->ids + i * k;
 
  289         for (j = 0; j < n2; j++, bs2_+= bytes_per_code) {
 
  290             dis = hc.hamming (bs2_);
 
  291             if (dis < bh_val_[0]) {
 
  292                 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
 
  293                 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
 
  297     if (order) ha->reorder ();
 
  304 void hammings_knn_1 (
 
  305         int_maxheap_array_t * ha,
 
  306         const uint64_t * bs1,
 
  307         const uint64_t * bs2,
 
  310         bool init_heap = 
true)
 
  312     const size_t nwords = 1;
 
  320 #pragma omp parallel for 
  321     for (
size_t i = 0; i < ha->nh; i++) {
 
  322         const uint64_t bs1_ = bs1 [i];
 
  323         const uint64_t * bs2_ = bs2;
 
  325         hamdis_t * bh_val_ = ha->val + i * k;
 
  326         hamdis_t bh_val_0 = bh_val_[0];
 
  327         long * bh_ids_ = ha->ids + i * k;
 
  329         for (j = 0; j < n2; j++, bs2_+= nwords) {
 
  330             dis = popcount64 (bs1_ ^ *bs2_);
 
  331             if (dis < bh_val_0) {
 
  332                 faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
 
  333                 faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
 
  334                 bh_val_0 = bh_val_[0];
 
  353 void fvec2bitvec (
const float * x, uint8_t * b, 
size_t d)
 
  355     for (
int i = 0; i < d; i += 8) {
 
  358         int nj = i + 8 <= d ? 8 : d - i;
 
  359         for (
int j = 0; j < nj; j++) {
 
  373 void fvecs2bitvecs (
const float * x, uint8_t * b, 
size_t d, 
size_t n)
 
  375     const long ncodes = ((d + 7) / 8);
 
  376 #pragma omp parallel for 
  377     for (
size_t i = 0; i < n; i++)
 
  378         fvec2bitvec (x + i * d, b + i * ncodes, d);
 
  383 static uint64_t uint64_reverse_bits (uint64_t b)
 
  387     for (i = 0; i < 64; i++) {
 
  397 void bitvec_print (
const uint8_t * b, 
size_t d)
 
  400     for (i = 0; i < d; ) {
 
  401         uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
 
  402         for (j = 0; j < 64 && i < d; j++, i++) {
 
  403             printf (
"%d", (
int) (brev & 1));
 
  419 #define C64(x) ((uint64_t *)x) 
  426         size_t na, 
size_t nb,
 
  428         hamdis_t * __restrict dis)
 
  430     FAISS_THROW_IF_NOT (ncodes % 8 == 0);
 
  433             faiss::hammings <64>  (C64(a), C64(b), na, nb, dis); 
return;
 
  435             faiss::hammings <128> (C64(a), C64(b), na, nb, dis); 
return;
 
  437             faiss::hammings <256> (C64(a), C64(b), na, nb, dis); 
return;
 
  439             faiss::hammings <512> (C64(a), C64(b), na, nb, dis); 
return;
 
  446 void hammings_knn_core (
 
  447         int_maxheap_array_t * ha,
 
  453     FAISS_THROW_IF_NOT (ncodes % 8 == 0);
 
  456         hammings_knn_1 (ha, C64(a), C64(b), nb, 
false, 
true);
 
  461         hammings_knn_hc<faiss::HammingComputer16>
 
  462             (16, ha, a, b, nb, 
false, 
true);
 
  465         hammings_knn_hc<faiss::HammingComputer32>
 
  466             (32, ha, a, b, nb, 
false, 
true);
 
  469         hammings_knn_hc<faiss::HammingComputerM8>
 
  470             (ncodes, ha, a, b, nb, 
false, 
true);
 
  484         hammings_knn_hc<faiss::HammingComputer4>
 
  485             (4, ha, a, b, nb, order, 
true);
 
  488         hammings_knn_1 (ha, C64(a), C64(b), nb, order, 
true);
 
  493         hammings_knn_hc<faiss::HammingComputer16>
 
  494             (16, ha, a, b, nb, order, 
true);
 
  497         hammings_knn_hc<faiss::HammingComputer32>
 
  498             (32, ha, a, b, nb, order, 
true);
 
  501         if(ncodes % 8 == 0) {
 
  502             hammings_knn_hc<faiss::HammingComputerM8>
 
  503                 (ncodes, ha, a, b, nb, order, 
true);
 
  505             hammings_knn_hc<faiss::HammingComputerDefault>
 
  506                 (ncodes, ha, a, b, nb, order, 
true);
 
  516 void hamming_count_thres (
 
  527             faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
 
  531             faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
 
  535             faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
 
  539             faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
 
  543           FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
 
  549 void crosshamming_count_thres (
 
  558             faiss::crosshamming_count_thres <64>  (C64(dbs), n, ht, nptr);
 
  561             faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
 
  564             faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
 
  567             faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
 
  570             FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
 
  576 size_t match_hamming_thres (
 
  588           return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
 
  589                                                   n1, n2, ht, idx, dis);
 
  591           return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
 
  592                                                    n1, n2, ht, idx, dis);
 
  594           return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
 
  595                                                    n1, n2, ht, idx, dis);
 
  597           return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
 
  598                                                    n1, n2, ht, idx, dis);
 
  600             FAISS_THROW_FMT (
"not implemented for %zu bits", ncodes);
 
  616 template <
class HammingComputer>
 
  617 static void hamming_dis_inner_loop (
 
  627     HammingComputer hc (ca, code_size);
 
  629     for (
size_t j = 0; j < nb; j++) {
 
  630         int ndiff = hc.hamming (cb);
 
  632         if (ndiff < bh_val_[0]) {
 
  633             maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
 
  634             maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
 
  653 #pragma omp parallel for 
  654     for (
int i = 0; i < na; i++) {
 
  655         const uint8_t *ca = a + i * code_size;
 
  656         const uint8_t *cb = b;
 
  658         hamdis_t * bh_val_ = ha->
val + i * k;
 
  659         long *     bh_ids_ = ha->
ids + i * k;
 
  663             hamming_dis_inner_loop<GenHammingComputer8>
 
  664                 (ca, cb, nb, 8, k, bh_val_, bh_ids_);
 
  667             hamming_dis_inner_loop<GenHammingComputer16>
 
  668                 (ca, cb, nb, 16, k, bh_val_, bh_ids_);
 
  671             hamming_dis_inner_loop<GenHammingComputer32>
 
  672                 (ca, cb, nb, 32, k, bh_val_, bh_ids_);
 
  675             hamming_dis_inner_loop<GenHammingComputerM8>
 
  676                 (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
 
size_t k
allocated size per heap 
void generalized_hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)
void reorder()
reorder all the heaps 
void hammings_knn(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
TI * ids
identifiers (size nh * k) 
void heapify()
prepare all the heaps before adding 
void hammings(const uint8_t *a, const uint8_t *b, size_t na, size_t nb, size_t nbytespercode, hamdis_t *dis)
T * val
values (distances or similarities), size nh * k