52 RAG(std::vector<std::string> document,
53 std::vector<std::vector<float>> embedding,
57 embedding_ = embedding;
58 dims.push_back(dimension);
61 index_ = faiss::IndexFlatL2(dimension);
62 faiss::IndexFlatL2 index(dimension);
63 int numDocument = document.size();
64 assert(numDocument == embedding.size());
66 weights_ =
new float[numDocument * dimension];
68 for (
const auto& vec : embedding_) {
69 std::copy(vec.begin(), vec.end(), weights_ + dataIndex);
70 dataIndex += vec.size();
73 std::vector<float> flattened1DEmbedding =
flatten(embedding_);
74 index_.add(numDocument, weights_);
86 const SelectivityVector& rows,
87 std::vector<VectorPtr>& args,
88 const TypePtr& outputType,
89 exec::EvalCtx& context,
90 VectorPtr& output)
const override {
91 BaseVector::ensureWritable(rows, outputType, context.pool(), output);
92 auto arrayOutput = output->asFlatVector<StringView>();
94 exec::DecodedArgs decodedArgs(rows, args, context);
95 auto decodedInput = decodedArgs.at(0);
96 auto inputArray = decodedInput->base()->as<ArrayVector>();
97 auto inputElements = inputArray->elements();
98 float* inputValues = inputElements->values()->asMutable<
float>();
99 auto inputOffsets = inputArray->rawOffsets();
100 auto inputSizes = inputArray->rawSizes();
104 std::map<vector_size_t, vector_size_t> rowMap;
106 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
108 std::vector<vector_size_t> uniqueRawIndexeVector;
109 vector_size_t numUniqueRows = 0;
110 rows.applyToSelected([&](vector_size_t row) {
111 auto mappedIndexInRowData = decodedInput->index(row);
112 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
113 uniqueRawIndexeSet.end()) {
115 rowMap[row] = numUniqueRows;
116 uniqueRawIndexeSet.insert(mappedIndexInRowData);
117 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
121 rowMap[row] = rowMap[mappedIndexInRowData];
125 std::vector<std::string> uniqueResults(numUniqueRows);
126 for (
int i = 0; i < numUniqueRows; i++) {
127 int index = uniqueRawIndexeVector[i];
129 std::vector<faiss::idx_t> labels(k);
130 std::vector<float> distances(k);
133 inputValues + inputOffsets[index],
137 uniqueResults[i] = document_[labels[0]];
140 std::vector<std::string> results(rows.size());
141 rows.applyToSelected([&](vector_size_t row) {
142 arrayOutput->set(row, StringView(uniqueResults[rowMap[row]]));