ML functions
 
Loading...
Searching...
No Matches
RAG.h
Go to the documentation of this file.
1
15
16#pragma once
17
18#include <faiss/Index.h>
19#include <faiss/IndexFlat.h>
20#include <cmath>
21#include <iostream>
22#include "BaseFunction.h"
23
29std::vector<float> flatten(const std::vector<std::vector<float>>& vec2D) {
30 std::vector<float> flatVec;
31
32 // Loop through each inner vector and add its elements to the flat vector
33 for (const auto& innerVec : vec2D) {
34 flatVec.insert(flatVec.end(), innerVec.begin(), innerVec.end());
35 }
36
37 return flatVec;
38}
39
44class RAG : public MLFunction {
45 public:
52 RAG(std::vector<std::string> document,
53 std::vector<std::vector<float>> embedding,
54 int dimension) {
55 // Create a deep copy of the weights
56 document_ = document;
57 embedding_ = embedding;
58 dims.push_back(dimension);
59
60 // Create the IndexFlatL2 index
61 index_ = faiss::IndexFlatL2(dimension);
62 faiss::IndexFlatL2 index(dimension); // call constructor
63 int numDocument = document.size();
64 assert(numDocument == embedding.size());
65
66 weights_ = new float[numDocument * dimension];
67 int dataIndex = 0;
68 for (const auto& vec : embedding_) {
69 std::copy(vec.begin(), vec.end(), weights_ + dataIndex);
70 dataIndex += vec.size();
71 }
72
73 std::vector<float> flattened1DEmbedding = flatten(embedding_);
74 index_.add(numDocument, weights_);
75 }
76
85 void apply(
86 const SelectivityVector& rows,
87 std::vector<VectorPtr>& args,
88 const TypePtr& outputType,
89 exec::EvalCtx& context,
90 VectorPtr& output) const override {
91 BaseVector::ensureWritable(rows, outputType, context.pool(), output);
92 auto arrayOutput = output->asFlatVector<StringView>();
93
94 exec::DecodedArgs decodedArgs(rows, args, context);
95 auto decodedInput = decodedArgs.at(0);
96 auto inputArray = decodedInput->base()->as<ArrayVector>();
97 auto inputElements = inputArray->elements();
98 float* inputValues = inputElements->values()->asMutable<float>();
99 auto inputOffsets = inputArray->rawOffsets();
100 auto inputSizes = inputArray->rawSizes();
101
102 // The map between the row index in the input data and the row index in
103 // the output data.
104 std::map<vector_size_t, vector_size_t> rowMap;
105 // for efficient check
106 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
107 // for iterating over the insert ordering
108 std::vector<vector_size_t> uniqueRawIndexeVector;
109 vector_size_t numUniqueRows = 0;
110 rows.applyToSelected([&](vector_size_t row) {
111 auto mappedIndexInRowData = decodedInput->index(row);
112 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
113 uniqueRawIndexeSet.end()) {
114 // add it
115 rowMap[row] = numUniqueRows;
116 uniqueRawIndexeSet.insert(mappedIndexInRowData);
117 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
118 ++numUniqueRows;
119 } else {
120 // already added
121 rowMap[row] = rowMap[mappedIndexInRowData];
122 }
123 });
124
125 std::vector<std::string> uniqueResults(numUniqueRows);
126 for (int i = 0; i < numUniqueRows; i++) {
127 int index = uniqueRawIndexeVector[i];
128 int k = 3;
129 std::vector<faiss::idx_t> labels(k);
130 std::vector<float> distances(k);
131 index_.search(
132 1,
133 inputValues + inputOffsets[index],
134 k,
135 distances.data(),
136 labels.data());
137 uniqueResults[i] = document_[labels[0]];
138 }
139
140 std::vector<std::string> results(rows.size());
141 rows.applyToSelected([&](vector_size_t row) {
142 arrayOutput->set(row, StringView(uniqueResults[rowMap[row]]));
143 });
144 }
145
150 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
151 return {exec::FunctionSignatureBuilder()
152 .returnType("VARCHAR")
153 .argumentType("ARRAY(REAL)")
154 .build()};
155 }
156
161 float* getTensor() const override {
162 return weights_;
163 }
164
169 std::string getFuncName() {
170 return getName();
171 };
172
177 static std::string getName() {
178 return "svd";
179 };
180
185 std::string getWeightsFile() {
186 return weightsFile_;
187 }
188
193 void setWeights(float* weights) {
194 weights_ = weights;
195 }
196
202 CostEstimate getCost(std::vector<int> inputDims) {
203 std::vector<double> coefficientVector = getCoefficientVector(getName());
204 float cost = coefficientVector[0] * inputDims[0] * inputDims[1];
205
206 return CostEstimate(cost, inputDims[0], inputDims[1]);
207 }
208
209 private:
210 float* weights_;
211 std::vector<std::string> document_;
212 std::vector<std::vector<float>> embedding_;
213 std::string weightsFile_;
214 faiss::IndexFlatL2 index_;
215};
std::vector< float > flatten(const std::vector< std::vector< float > > &vec2D)
Flattens a 2D vector into a 1D vector.
Definition RAG.h:29
A base class for machine learning functions, inheriting from Velox's VectorFunction.
Definition BaseFunction.h:9
std::vector< double > getCoefficientVector(std::string name)
Retrieves the cost coefficients for the function.
Definition BaseFunction.h:83
std::vector< int > dims
Dimensions of the function.
Definition BaseFunction.h:61
void setWeights(float *weights)
Sets the weights for the function.
Definition RAG.h:193
RAG(std::vector< std::string > document, std::vector< std::vector< float > > embedding, int dimension)
Constructor for RAG.
Definition RAG.h:52
CostEstimate getCost(std::vector< int > inputDims)
Estimates the cost of the function.
Definition RAG.h:202
std::string getWeightsFile()
Returns the path to the weights file.
Definition RAG.h:185
static std::string getName()
Returns the name of the function.
Definition RAG.h:177
std::string getFuncName()
Returns the name of the function.
Definition RAG.h:169
float * getTensor() const override
Returns the tensor associated with the function.
Definition RAG.h:161
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures.
Definition RAG.h:150
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &outputType, exec::EvalCtx &context, VectorPtr &output) const override
Applies the RAG function to the input data.
Definition RAG.h:85