11 #include "IVFFlat.cuh" 
   12 #include "../GpuResources.h" 
   13 #include "FlatIndex.cuh" 
   14 #include "InvertedListAppend.cuh" 
   15 #include "IVFFlatScan.cuh" 
   16 #include "RemapIndices.h" 
   17 #include "../utils/CopyUtils.cuh" 
   18 #include "../utils/DeviceDefs.cuh" 
   19 #include "../utils/DeviceUtils.h" 
   20 #include "../utils/Float16.cuh" 
   21 #include "../utils/HostTensor.cuh" 
   22 #include "../utils/Transpose.cuh" 
   24 #include <thrust/host_vector.h> 
   25 #include <unordered_map> 
   27 namespace faiss { 
namespace gpu {
 
   33                  IndicesOptions indicesOptions,
 
   37 #ifdef FAISS_USE_FLOAT16
 
   39             sizeof(half) * quantizer->getDim()
 
   40             : sizeof(float) * quantizer->getDim(),
 
   42             sizeof(float) * quantizer->getDim(),
 
   46     l2Distance_(l2Distance),
 
   47     useFloat16_(useFloat16) {
 
   70   auto prevData = listData->data();
 
   74   FAISS_ASSERT(listData->size() + lengthInBytes <=
 
   75          (size_t) std::numeric_limits<int>::max());
 
   78 #ifdef FAISS_USE_FLOAT16 
   87                          {(int) numVecs * 
dim_});
 
   88     auto halfData = toHalf<1>(
resources_, stream, floatData);
 
   90     listData->append((
unsigned char*) halfData.data(),
 
   99     listData->append((
unsigned char*) vecs,
 
  110   if (prevData != listData->data()) {
 
  124     streamWait({stream}, {0});
 
  144   auto listIds = listIds2d.
view<1>({vecs.
getSize(0)});
 
  146   quantizer_->query(vecs, 1, listDistance, listIds2d, 
false);
 
  159   std::unordered_map<int, int> assignCounts;
 
  165   for (
int i = 0; i < listIds.
getSize(0); ++i) {
 
  166     int listId = listIdsHost[i];
 
  170       listOffsetHost[i] = -1;
 
  179     auto it = assignCounts.find(listId);
 
  180     if (it != assignCounts.end()) {
 
  181       offset += it->second;
 
  184       assignCounts[listId] = 1;
 
  187     listOffsetHost[i] = offset;
 
  201     for (
auto& counts : assignCounts) {
 
  207       auto& indices = deviceListIndices_[counts.first];
 
  213         indices->resize(indices->size() + counts.second * indexSize, stream);
 
  219         userIndices.resize(newNumVecs);
 
  233       std::vector<int> listIds(assignCounts.size());
 
  235       for (
auto& counts : assignCounts) {
 
  236         listIds[i++] = counts.first;
 
  249     for (
int i = 0; i < hostIndices.
getSize(0); ++i) {
 
  250       int listId = listIdsHost[i];
 
  257       int offset = listOffsetHost[i];
 
  262       FAISS_ASSERT(offset < userIndices.size());
 
  263       userIndices[offset] = hostIndices[i];
 
  272     runIVFFlatInvertedListAppend(listIds,
 
  296   FAISS_ASSERT(nprobe <= 1024);
 
  297   FAISS_ASSERT(k <= 1024);
 
  307     coarseDistances(mem, {queries.
getSize(0), nprobe}, stream);
 
  309     coarseIndices(mem, {queries.
getSize(0), nprobe}, stream);
 
  319   runIVFFlatScan(queries,
 
  340     ivfOffsetToUserIndex(hostOutIndices.
data(),
 
  348     outIndices.
copyFrom(hostOutIndices, stream);
 
  360 #ifdef FAISS_USE_FLOAT16 
  361     size_t num = encVecs.size() / 
sizeof(half);
 
  364     auto devFloat = fromHalf(
resources_, stream, devHalf);
 
  366     std::vector<float> out(num);
 
  368     hostFloat.
copyFrom(devFloat, stream);
 
  374   size_t num = encVecs.size() / 
sizeof(float);
 
  378   std::vector<float> out(num);
 
  380   hostFloat.
copyFrom(devFloat, stream);
 
const int numLists_
Number of inverted lists we maintain. 
int maxListLength_
Maximum list length seen. 
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device. 
int getSize() const 
Returns the number of vectors we contain. 
std::vector< std::vector< long > > listOffsetToUserIndex_
Holder of GPU resources for a particular flat index. 
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view(DataPtrType at)
Base inverted list functionality for IVFFlat and IVFPQ. 
IVFFlat(GpuResources *resources, FlatIndex *quantizer, bool l2Distance, bool useFloat16, IndicesOptions indicesOptions, MemorySpace space)
Construct from a quantizer that has elemen. 
thrust::device_vector< int > deviceListLengths_
thrust::device_vector< void * > deviceListIndexPointers_
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device. 
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match. 
FlatIndex * quantizer_
Quantizer object. 
__host__ __device__ IndexT getSize(int i) const 
thrust::device_vector< void * > deviceListDataPointers_
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data. 
GpuResources * resources_
Collection of GPU resources that we use. 
void addCodeVectorsFromCpu(int listId, const float *vecs, const long *indices, size_t numVecs)
const int bytesPerVector_
Number of bytes per vector in the list. 
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
std::vector< float > getListVectors(int listId) const 
Return the vectors of a particular list back to the CPU. 
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information. 
const IndicesOptions indicesOptions_
How are user indices stored on the GPU? 
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
const int dim_
Expected dimensionality of the vectors. 
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.