ML functions
 
Loading...
Searching...
No Matches
UtilFunction.h
Go to the documentation of this file.
1
15
16#pragma once
17
18#include <Eigen/Dense>
19#include <cmath>
20#include <filesystem>
21#include <iostream>
22#include "BaseFunction.h"
23#include "velox/exec/tests/utils/AssertQueryBuilder.h"
24#include "velox/exec/tests/utils/PlanBuilder.h"
25#include "velox/exec/tests/utils/TempDirectoryPath.h"
26#include "velox/vector/tests/utils/VectorTestBase.h"
27
28using namespace facebook::velox;
29using namespace facebook::velox::test;
30using namespace facebook::velox::exec::test;
31using namespace facebook::velox::memory;
32
37class ChangeRating : public MLFunction {
38 public:
43
52 void apply(
53 const SelectivityVector& rows,
54 std::vector<VectorPtr>& args,
55 const TypePtr& type,
56 exec::EvalCtx& context,
57 VectorPtr& output) const override {
58 BaseVector::ensureWritable(rows, type, context.pool(), output);
59
60 auto input = args[0];
61 int* inputValues = input->as<FlatVector<int>>()->values()->asMutable<int>();
62
63 int inputSize = rows.size();
64
65 std::vector<int> result(rows.size());
66
67 for (int i = 0; i < inputSize; i++) {
68 result[i] = (inputValues[i] >= 3) ? 1 : 0;
69 }
70
71 VectorMaker maker{context.pool()};
72 output = maker.flatVector<int>(result, INTEGER());
73 }
74
79 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
80 return {exec::FunctionSignatureBuilder()
81 .argumentType("INTEGER")
82 .returnType("INTEGER")
83 .build()};
84 }
85
90 static std::string getName() {
91 return "change_rating";
92 };
93
98 float* getTensor() const override {
99 // FIXME
100 return nullptr;
101 }
102
106 void setWeight() {
107 //
108 }
109};
110
116 public:
121
130 void apply(
131 const SelectivityVector& rows,
132 std::vector<VectorPtr>& args,
133 const TypePtr& type,
134 exec::EvalCtx& context,
135 VectorPtr& output) const override {
136 BaseVector::ensureWritable(rows, type, context.pool(), output);
137
138 auto input = args[0];
139 int* inputValues = input->as<FlatVector<int>>()->values()->asMutable<int>();
140
141 int inputSize = rows.size();
142
143 std::vector<std::vector<int>> result(rows.size(), std::vector<int>(1));
144
145 for (int i = 0; i < inputSize; i++) {
146 result[i][0] = inputValues[i];
147 }
148
149 VectorMaker maker{context.pool()};
150 output = maker.arrayVector<int>(result, INTEGER());
151 }
152
157 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
158 return {exec::FunctionSignatureBuilder()
159 .argumentType("INTEGER")
160 .returnType("array(INTEGER)")
161 .build()};
162 }
163
168 static std::string getName() {
169 return "convert_int_array";
170 };
171
176 float* getTensor() const override {
177 // FIXME
178 return nullptr;
179 }
180
184 void setWeight() {
185 //
186 }
187};
188
194 public:
199
208 void apply(
209 const SelectivityVector& rows,
210 std::vector<VectorPtr>& args,
211 const TypePtr& type,
212 exec::EvalCtx& context,
213 VectorPtr& output) const override {
214 BaseVector::ensureWritable(rows, type, context.pool(), output);
215
216 auto input = args[0];
217 float* inputValues =
218 input->as<FlatVector<float>>()->values()->asMutable<float>();
219
220 int inputSize = rows.size();
221
222 std::vector<std::vector<float>> result(rows.size(), std::vector<float>(1));
223
224 for (int i = 0; i < inputSize; i++) {
225 result[i][0] = inputValues[i];
226 }
227
228 VectorMaker maker{context.pool()};
229 output = maker.arrayVector<float>(result, REAL());
230 }
231
236 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
237 return {exec::FunctionSignatureBuilder()
238 .argumentType("REAL")
239 .returnType("array(REAL)")
240 .build()};
241 }
242
247 static std::string getName() {
248 return "convert_float_array";
249 };
250
255 float* getTensor() const override {
256 // FIXME
257 return nullptr;
258 }
259
263 void setWeight() {
264 //
265 }
266};
267
273 public:
278
287 void apply(
288 const SelectivityVector& rows,
289 std::vector<VectorPtr>& args,
290 const TypePtr& type,
291 exec::EvalCtx& context,
292 VectorPtr& output) const override {
293 BaseVector::ensureWritable(rows, type, context.pool(), output);
294
295 auto input = args[0];
296 double* inputValues =
297 input->as<FlatVector<double>>()->values()->asMutable<double>();
298
299 int inputSize = rows.size();
300
301 std::vector<std::vector<float>> result(rows.size(), std::vector<float>(1));
302
303 for (int i = 0; i < inputSize; i++) {
304 result[i][0] = inputValues[i];
305 }
306
307 VectorMaker maker{context.pool()};
308 output = maker.arrayVector<float>(result, REAL());
309 }
310
315 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
316 return {exec::FunctionSignatureBuilder()
317 .argumentType("DOUBLE")
318 .returnType("array(REAL)")
319 .build()};
320 }
321
326 static std::string getName() {
327 return "convert_double_to_float_array";
328 };
329
334 float* getTensor() const override {
335 // FIXME
336 return nullptr;
337 }
338
342 void setWeight() {
343 //
344 }
345};
346
352 public:
357
366 void apply(
367 const SelectivityVector& rows,
368 std::vector<VectorPtr>& args,
369 const TypePtr& type,
370 exec::EvalCtx& context,
371 VectorPtr& output) const override {
372 BaseVector::ensureWritable(rows, type, context.pool(), output);
373
374 // Decoder is required to handle address error, reference code:
375 // ArrayIntersectExcept.cpp
376 BaseVector* input = args[0].get();
377
378 exec::LocalDecodedVector inputHolder(context, *input, rows);
379 auto decodedInputArray = inputHolder.get();
380 auto baseInputArray =
381 decodedInputArray->base()->as<ArrayVector>()->elements();
382
383 double* inputValues = baseInputArray->values()->asMutable<double>();
384
385 int numRawInput = rows.size();
386 int numInput = rows.countSelected();
387 int numElements = baseInputArray->size();
388 int sizeOfArray = numElements / numInput;
389
390 std::vector<std::vector<float>> result(
391 numRawInput, std::vector<float>(sizeOfArray));
392 int processedIndex = 0;
393 for (int i = 0; i < numRawInput; i++) {
394 if (!rows.isValid(i)) {
395 // Skip invalid rows
396 continue;
397 }
398 std::transform(
399 inputValues + processedIndex * sizeOfArray,
400 inputValues + (processedIndex + 1) * sizeOfArray,
401 result[i].begin(),
402 [](double val) { return static_cast<float>(val); });
403 processedIndex++;
404 }
405
406 VectorMaker maker{context.pool()};
407 auto localResult = maker.arrayVector<float>(result, REAL());
408 context.moveOrCopyResult(localResult, rows, output);
409 }
410
415 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
416 return {exec::FunctionSignatureBuilder()
417 .argumentType("array(DOUBLE)")
418 .returnType("array(REAL)")
419 .build()};
420 }
421
426 static std::string getName() {
427 return "convert_double_array_to_float_array";
428 };
429
434 float* getTensor() const override {
435 // FIXME
436 return nullptr;
437 }
438
442 void setWeight() {
443 //
444 }
445};
446
452std::string LoadBytesFromFile(const std::string& path) {
453 std::ifstream fs(path, std::ios::in | std::ios::binary);
454 if (fs.fail()) {
455 std::cerr << "Cannot open " << path << std::endl;
456 exit(1);
457 }
458 std::string data;
459 fs.seekg(0, std::ios::end);
460 size_t size = static_cast<size_t>(fs.tellg());
461 fs.seekg(0, std::ios::beg);
462 data.resize(size);
463 fs.read(data.data(), size);
464 return data;
465}
466
472bool stringToBool(const std::string& str) {
473 std::string lowerStr = str;
474 std::transform(lowerStr.begin(), lowerStr.end(), lowerStr.begin(), ::tolower);
475 return (lowerStr == "true");
476}
477
483std::string getEnvVar(std::string const& key) {
484 char const* val = getenv(key.c_str());
485 return val == NULL ? std::string() : std::string(val);
486}
487
494void readDataStats(const std::string& path, int& numRows, int& numCols) {
495 std::ifstream file(path);
496 if (!file.is_open()) {
497 std::cerr << "Cannot open " << path << std::endl;
498 exit(1);
499 }
500 std::string line;
501 std::getline(file, line);
502 numRows = std::stoi(line);
503 std::getline(file, line);
504 numCols = std::stoi(line);
505}
506
514template <typename T>
516 const std::vector<std::vector<T>>& vec2D,
517 size_t& totalSize) {
518 // Calculate total size in one pass
519 totalSize = 0;
520 for (const auto& row : vec2D) {
521 totalSize += row.size();
522 }
523
524 // Allocate memory for the flattened array
525 T* flatArray = new T[totalSize];
526
527 // Flatten the 2D vector into the 1D array
528 T* ptr = flatArray;
529 for (const auto& row : vec2D) {
530 std::copy(row.begin(), row.end(), ptr);
531 ptr += row.size();
532 }
533
534 return flatArray;
535}
536
543template <typename T>
544T* flattenVectorToPointer(const std::vector<std::vector<T>>& vec2D) {
545 size_t totalSize =
546 0; // A local variable to hold the size if not provided by the caller
547 return flattenVectorToPointer(vec2D, totalSize);
548}
549
555int countWords(const std::string& input) {
556 std::istringstream stream(input);
557 std::string word;
558 int count = 0;
559
560 while (stream >> word) {
561 ++count;
562 }
563
564 return count;
565}
std::string getEnvVar(std::string const &key)
Retrieves the value of an environment variable.
Definition UtilFunction.h:483
void readDataStats(const std::string &path, int &numRows, int &numCols)
Reads the number of rows and columns from a data statistics file.
Definition UtilFunction.h:494
int countWords(const std::string &input)
Counts the number of words in a string.
Definition UtilFunction.h:555
bool stringToBool(const std::string &str)
Converts a string to a boolean value.
Definition UtilFunction.h:472
T * flattenVectorToPointer(const std::vector< std::vector< T > > &vec2D, size_t &totalSize)
Flattens a 2D vector into a 1D array.
Definition UtilFunction.h:515
std::string LoadBytesFromFile(const std::string &path)
Loads bytes from a file into a string.
Definition UtilFunction.h:452
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the function to change ratings to binary values.
Definition UtilFunction.h:52
void setWeight()
Sets the weight for the function.
Definition UtilFunction.h:106
float * getTensor() const override
Returns the tensor associated with the function.
Definition UtilFunction.h:98
ChangeRating()
Default constructor for ChangeRating.
Definition UtilFunction.h:42
static std::string getName()
Returns the name of the function.
Definition UtilFunction.h:90
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures.
Definition UtilFunction.h:79
void setWeight()
Sets the weight for the function.
Definition UtilFunction.h:442
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the function to convert a double array to a float array.
Definition UtilFunction.h:366
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures.
Definition UtilFunction.h:415
static std::string getName()
Returns the name of the function.
Definition UtilFunction.h:426
ConvertDoubleArrayToFloatArray()
Default constructor for ConvertDoubleArrayToFloatArray.
Definition UtilFunction.h:356
float * getTensor() const override
Returns the tensor associated with the function.
Definition UtilFunction.h:434
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures.
Definition UtilFunction.h:315
static std::string getName()
Returns the name of the function.
Definition UtilFunction.h:326
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the function to convert a double vector to a float array.
Definition UtilFunction.h:287
float * getTensor() const override
Returns the tensor associated with the function.
Definition UtilFunction.h:334
ConvertDoubleToFloatArray()
Default constructor for ConvertDoubleToFloatArray.
Definition UtilFunction.h:277
void setWeight()
Sets the weight for the function.
Definition UtilFunction.h:342
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the function to convert a float vector to a float array.
Definition UtilFunction.h:208
void setWeight()
Sets the weight for the function.
Definition UtilFunction.h:263
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures.
Definition UtilFunction.h:236
ConvertToFloatArray()
Default constructor for ConvertToFloatArray.
Definition UtilFunction.h:198
static std::string getName()
Returns the name of the function.
Definition UtilFunction.h:247
float * getTensor() const override
Returns the tensor associated with the function.
Definition UtilFunction.h:255
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures.
Definition UtilFunction.h:157
void setWeight()
Sets the weight for the function.
Definition UtilFunction.h:184
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the function to convert an integer vector to an integer array.
Definition UtilFunction.h:130
float * getTensor() const override
Returns the tensor associated with the function.
Definition UtilFunction.h:176
ConvertToIntArray()
Default constructor for ConvertToIntArray.
Definition UtilFunction.h:120
static std::string getName()
Returns the name of the function.
Definition UtilFunction.h:168
A base class for machine learning functions, inheriting from Velox's VectorFunction.
Definition BaseFunction.h:9