ML functions
 
Loading...
Searching...
No Matches
functions.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2025 ASU Cactus Lab.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
26
27#pragma once
28#include <torch/torch.h>
29#include <Eigen/Dense>
30#include <chrono>
31#include <filesystem>
32#include "BaseFunction.h"
33#include "BatchNorm.h"
34#include "ChatGPT.h"
35#include "ComplexLayer.h"
36#include "Concat.h"
37#include "CosineSimilarity.h"
38#include "DecisionForest.h"
39#include "DecisionTree.h"
40#include "DotProduct.h"
41#include "Dropout.h"
42#include "Embedding.h"
43#include "Encoder.h"
46#include "PositionEncoding.h"
47#include "RAG.h"
48#include "SequencePooling.h"
49#include "XGBoost.h"
50#include "velox/vector/tests/utils/VectorMaker.h"
51
52using namespace facebook::velox;
53using namespace facebook::velox::test;
54
55/*
56 TODO
57 1. conv2d - done
58 2. max pooling - done
59 3. flatten - not required
60 4. batch normalization
61 5. padding
62 6. concatenate
63 7. embedding
64 8. transformer -> exiting libraries, encoder, decoder, how to decompoose it
65 into atomic linear algebra
66 // focus on weight
67 9. GRU -> not interesting
68*/
69
70// TODO: Refactor
71// class MLFunction : public exec::VectorFunction {
72// public:
73// virtual ~MLFunction() = default;
74
75// virtual float* getTensor() const = 0;
76
77// virtual std::vector<int> getDims() {
78// return dims;
79// }
80
81// virtual std::string getFuncName() {
82// return "";
83// }
84
85// virtual int getNumDims() {
86// return dims.size();
87// }
88
89// virtual CostEstimate getCost(std::vector<int> inputDims) {
90// return CostEstimate(0, inputDims[0], inputDims[1]);
91// }
92
93// protected:
94// std::vector<int> dims;
95// double getWeightedCost(std::string name, float cost) {
96// std::vector<double> coefficient =
97// UdfCostCoefficient::getInstance().getCoefficient(name);
98// // FIXME
99// return 0;
100// // return coefficient[0] * cost;
101// }
102// std::vector<double> getCoefficientVector(std::string name) {
103// return UdfCostCoefficient::getInstance().getCoefficient(name);
104// }
105// };
106
115 public:
122 MatrixMultiply(float* weights, int num_rows, int num_cols) {
123 // Create a deep copy of the weights.
124 weights_ = new float[num_rows * num_cols];
125 std::memcpy(weights_, weights, num_rows * num_cols * sizeof(float));
126 dims.push_back(num_rows);
127 dims.push_back(num_cols);
128 }
129
136 MatrixMultiply(std::string weightsFile, int num_rows, int num_cols) {
137 weightsFile_ = weightsFile;
138 dims.push_back(num_rows);
139 dims.push_back(num_cols);
140 }
141
150 void apply(
151 const SelectivityVector& rows,
152 std::vector<VectorPtr>& args,
153 const TypePtr& outputType,
154 exec::EvalCtx& context,
155 VectorPtr& output) const override {
156 bool use_gpu = false;
157 if (args.size() == 2) {
158 // An optional parameter can be passed to enable the GPU for matrix multiplication.
159 use_gpu = args[1]->as<ConstantVector<bool>>()->valueAt(0);
160 }
161 if (use_gpu) {
162 // TODO: Implement GPU matrix multiplication.
163 throw std::runtime_error(
164 "GPU implementation of Matrix Multiplication is not implemented.");
165 } else {
166 // Ensure output vector is writable.
167 context.ensureWritable(rows, outputType, output);
168 output->clearNulls(rows);
169 auto arrayOutput = output->as<ArrayVector>();
170 auto sizes = arrayOutput->mutableSizes(rows.end());
171 auto rawSizes = sizes->asMutable<int32_t>();
172 auto offsets = arrayOutput->mutableOffsets(rows.end());
173 auto rawOffsets = offsets->asMutable<int32_t>();
174
175 // Initialize sizes and offsets to zero.
176 std::fill(rawSizes, rawSizes + rows.end(), 0);
177 std::fill(rawOffsets, rawOffsets + rows.end(), 0);
178
179 auto elementsOutput = arrayOutput->elements();
180 auto elementsPool = context.pool();
181
182 // Perform matrix multiplication logic.
183 exec::DecodedArgs decodedArgs(rows, args, context);
184 auto decodedInput = decodedArgs.at(0);
185 auto inputArray = decodedInput->base()->as<ArrayVector>();
186 auto inputElements = inputArray->elements();
187 float* inputValues = inputElements->values()->asMutable<float>();
188 auto inputOffsets = inputArray->rawOffsets();
189 auto inputSizes = inputArray->rawSizes();
190
191 // The map between the row index in the input data and the row index in the output data.
192 std::map<vector_size_t, vector_size_t> rowMap;
193 // For efficient check.
194 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
195 // For iterating over the insert ordering.
196 std::vector<vector_size_t> uniqueRawIndexeVector;
197 vector_size_t numUniqueRows = 0;
198 rows.applyToSelected([&](vector_size_t row) {
199 auto mappedIndexInRowData = decodedInput->index(row);
200 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
201 uniqueRawIndexeSet.end()) {
202 // Add it.
203 rowMap[row] = numUniqueRows;
204 uniqueRawIndexeSet.insert(mappedIndexInRowData);
205 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
206 ++numUniqueRows;
207 } else {
208 // Already added.
209 rowMap[row] = rowMap[mappedIndexInRowData];
210 }
211 });
212
213 int numInputMatrixRows = numUniqueRows;
214 Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);
215 int rowIndex = 0;
216 for (auto rawIndex : uniqueRawIndexeVector) {
217 Eigen::Map<const Eigen::VectorXf> rowVector(
218 inputValues + inputOffsets[rawIndex], dims[0]);
219 inputMatrix.row(rowIndex++) = rowVector;
220 }
221
222 Eigen::Map<
223 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
224 weightMatrix(weights_, dims[0], dims[1]);
225 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
226 resultMatrix = inputMatrix * weightMatrix;
227
228 // Append results to the output vector.
229 auto baseOffset = elementsOutput->size();
230 elementsOutput->resize(baseOffset + rows.end() * dims[1]);
231
232 float* outputValues = elementsOutput->values()->asMutable<float>();
233 vector_size_t outputOffset = 0;
234 rows.applyToSelected([&](vector_size_t row) {
235 if (rowMap.find(row) == rowMap.end()) {
236 throw std::runtime_error(
237 "Mapped index not found for the result matrix.");
238 }
239 auto mappedIndexInResultMatrix = rowMap[row];
240 rawOffsets[row] = outputOffset;
241 rawSizes[row] = dims[1];
242 std::memcpy(
243 outputValues + outputOffset,
244 resultMatrix.row(mappedIndexInResultMatrix).data(),
245 dims[1] * sizeof(float));
246
247 outputOffset += dims[1];
248 });
249 arrayOutput->setElements(elementsOutput);
250 }
251 }
252
257 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
258 return {
259 exec::FunctionSignatureBuilder()
260 .returnType("array(REAL)")
261 .argumentType("array(REAL)")
262 .build(),
263 // Supports an additional flag: use_gpu.
264 exec::FunctionSignatureBuilder()
265 .returnType("array(REAL)")
266 .argumentType("array(REAL)")
267 .argumentType("BOOLEAN")
268 .build()};
269 }
270
275 float* getTensor() const override {
276 return weights_;
277 }
278
283 std::string getFuncName() {
284 return getName();
285 };
286
291 static std::string getName() {
292 return "mat_mul";
293 };
294
299 std::string getWeightsFile() {
300 return weightsFile_;
301 }
302
307 void setWeights(float* weights) {
308 weights_ = weights;
309 }
310
316 CostEstimate getCost(std::vector<int> inputDims) {
317 std::vector<double> coefficientVector = getCoefficientVector(getName());
318 int factor1 = inputDims[0];
319 int factor2 = dims[0];
320 int factor3 = dims[1];
321 float cost = coefficientVector[0] * factor1 * factor2 * factor3 +
322 coefficientVector[1] * factor1 + coefficientVector[2] * factor2 +
323 coefficientVector[3] * factor3;
324 return CostEstimate(cost, inputDims[0], dims[1]);
325 }
326
327 private:
328 float* weights_;
329 std::string weightsFile_;
330};
331
340 public:
348 MatrixMultiply_b(int num_rows, int num_cols, int num_samples, int blocks) {
349 dims.push_back(num_rows);
350 dims.push_back(num_cols);
351 dims.push_back(num_samples);
352 dims.push_back(blocks);
353 }
354
363 void apply(
364 const SelectivityVector& rows,
365 std::vector<VectorPtr>& args,
366 const TypePtr& type,
367 exec::EvalCtx& context,
368 VectorPtr& output) const override {
369 BaseVector::ensureWritable(rows, type, context.pool(), output);
370 VectorMaker maker{context.pool()};
371
372 BaseVector* left = args[0].get();
373 BaseVector* right = args[1].get();
374
375 exec::LocalDecodedVector leftHolder(context, *left, rows);
376 auto decodedLeftArray = leftHolder.get();
377 auto baseLeftArray =
378 decodedLeftArray->base()->as<ArrayVector>()->elements();
379
380 exec::LocalDecodedVector rightHolder(context, *right, rows);
381 auto decodedRightArray = rightHolder.get();
382 auto baseRightArray = rightHolder->base()->as<ArrayVector>()->elements();
383
384 float* input_values_v = baseLeftArray->values()->asMutable<float>();
385 float* input_values_w = baseRightArray->values()->asMutable<float>();
386
387 // auto varrayVector = std::make_shared<ArrayVector<float>>();
388 // const int elements_v_per_row = 1500000; //6000*250
389 // const int elements_w_per_row = 125000; // 250*500
390
391 // std::vector<std::vector<float>> result(1,
392 // std::vector<float>(dims[1]*dims[2])); //6000*500
393 Eigen::Map<
394 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
395 m1(input_values_v, dims[2], dims[0]); // 3*2
396 Eigen::Map<
397 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
398 m2(input_values_w, dims[0], dims[1]); // 2*5
399 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> m =
400 m1 * m2; // 3*5
401
402 // for (int i = 0; i < m.rows(); ++i) {
403 // for (int j = 0; j < m.cols(); ++j) {
404 // result[0][i * dims[1] + j] = m(i, j);
405 // }
406 // }
407 // m = m.reshaped(1, m.size());
408 // std::cout << "shape: " << m.rows() << "," <<m.cols() << std::endl;
409 std::vector<std::vector<float>> result;
410 for (int i = 0; i < m.rows(); i++) {
411 std::vector<float> row(m.row(i).data(), m.row(i).data() + m.cols());
412 result.push_back(row);
413 }
414 auto baseVector = maker.arrayVector<float>(result, REAL());
415 auto arrayOfArrays = maker.arrayVector({0}, baseVector);
416 output = arrayOfArrays;
417 }
418
423 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
424 return {exec::FunctionSignatureBuilder()
425 .returnType("array(array(REAL))")
426 .argumentType("array(REAL)")
427 .argumentType("array(REAL)")
428 .build()};
429 }
430
435 float* getTensor() const override {
436 return weights_;
437 }
438
443 std::string getFuncName() {
444 return getName();
445 };
446
451 static std::string getName() {
452 return "mat_mul_block";
453 };
454
455 private:
456 float* weights_;
457};
458
459
469 public:
476 MatrixMultiply_h(int num_rows, int num_cols, int block_size) {
477 dims.push_back(num_rows);
478 dims.push_back(num_cols);
479 dims.push_back(block_size);
480 }
481
490 void apply(
491 const SelectivityVector& rows,
492 std::vector<VectorPtr>& args,
493 const TypePtr& outputType,
494 exec::EvalCtx& context,
495 VectorPtr& output) const override {
496 BaseVector::ensureWritable(rows, outputType, context.pool(), output);
497 output->clearNulls(rows);
498 auto arrayOutput = output->as<ArrayVector>();
499 auto sizes = arrayOutput->mutableSizes(rows.end());
500 auto rawSizes = sizes->asMutable<int32_t>();
501 auto offsets = arrayOutput->mutableOffsets(rows.end());
502 auto rawOffsets = offsets->asMutable<int32_t>();
503
504 // Initialize sizes and offsets to zero.
505 std::fill(rawSizes, rawSizes + rows.end(), 0);
506 std::fill(rawOffsets, rawOffsets + rows.end(), 0);
507
508 auto elementsOutput = arrayOutput->elements();
509 auto elementsPool = context.pool();
510 VectorMaker maker{context.pool()};
511
512 // Validate input arguments
513 VELOX_CHECK_EQ(
514 args.size(), 2, "Blocked-based matrix multiply requires 2 inputs");
515
516 exec::DecodedArgs decodedArgs(rows, args, context);
517 auto numInputs = rows.size();
518 auto decodedInput1 = decodedArgs.at(0);
519 auto decodedInput2 = decodedArgs.at(1);
520 auto input1Array = decodedInput1->base()->as<ArrayVector>();
521 auto input2Array = decodedInput2->base()->as<ArrayVector>();
522 auto input1Elements = input1Array->elements();
523 auto input1Offsets = input1Array->rawOffsets();
524 auto input1Sizes = input1Array->rawSizes();
525 auto input2Elements = input2Array->elements();
526
527 float* input1Values = input1Elements->values()->asMutable<float>();
528 float* input2Values = input2Elements->values()->asMutable<float>();
529
530 int currentBlockSize = (input2Elements->size() < (dims[0] * dims[2]))
531 ? input2Elements->size() / dims[0]
532 : dims[2];
533 int input1MatrixNumRow = input1Elements->size() / dims[0];
534
535 std::map<vector_size_t, vector_size_t> rowMap;
536 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
537 std::vector<vector_size_t> uniqueRawIndexeVector;
538 vector_size_t numUniqueRows = 0;
539 rows.applyToSelected([&](vector_size_t row) {
540 auto mappedIndexInRowData = decodedInput1->index(row);
541 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
542 uniqueRawIndexeSet.end()) {
543 // Add it.
544 rowMap[row] = numUniqueRows;
545 uniqueRawIndexeSet.insert(mappedIndexInRowData);
546 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
547 ++numUniqueRows;
548 } else {
549 // Already added.
550 rowMap[row] = rowMap[mappedIndexInRowData];
551 }
552 });
553
554 int numInputMatrixRows = numUniqueRows;
555 Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);
556 int rowIndex = 0;
557 for (auto rawIndex : uniqueRawIndexeVector) {
558 Eigen::Map<const Eigen::VectorXf> rowVector(
559 input1Values + input1Offsets[rawIndex], dims[0]);
560 inputMatrix.row(rowIndex++) = rowVector;
561 }
562
563 Eigen::Map<
564 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
565 weightMatrix(input2Values, dims[0], currentBlockSize);
566 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
567 resultMatrix = inputMatrix * weightMatrix;
568
569 auto baseOffset = elementsOutput->size();
570 elementsOutput->resize(baseOffset + rows.end() * currentBlockSize);
571
572 float* outputValues = elementsOutput->values()->asMutable<float>();
573
574 vector_size_t outputOffset = 0;
575 rows.applyToSelected([&](vector_size_t row) {
576 if (rowMap.find(row) == rowMap.end()) {
577 throw std::runtime_error(
578 "Mapped index not found for the result matrix.");
579 }
580 auto mappedIndexInResultMatrix = rowMap[row];
581 rawOffsets[row] = outputOffset;
582 rawSizes[row] = currentBlockSize;
583 std::memcpy(
584 outputValues + outputOffset,
585 resultMatrix.row(mappedIndexInResultMatrix).data(),
586 currentBlockSize * sizeof(float));
587 outputOffset += currentBlockSize;
588 });
589 arrayOutput->setElements(elementsOutput);
590 }
591
596 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
597 return {exec::FunctionSignatureBuilder()
598 .returnType("array(REAL)")
599 .argumentType("array(REAL)")
600 .argumentType("array(REAL)")
601 .build()};
602 }
603
608 float* getTensor() const override {
609 return weights_;
610 }
611
616 std::string getFuncName() {
617 return getName();
618 };
619
624 static std::string getName() {
625 return "mat_mul_h";
626 };
627
633 CostEstimate getCost(std::vector<int> inputDims) {
634 std::vector<double> coefficientVector = getCoefficientVector("mat_mul");
635 int factor1 = inputDims[0];
636 int factor2 = inputDims[1];
637 int factor3 = dims[2];
638 float cost = coefficientVector[0] * factor1 * factor2 * factor3 +
639 coefficientVector[1] * factor1 + coefficientVector[2] * factor2 +
640 coefficientVector[3] * factor3;
641 return CostEstimate(cost, inputDims[0], dims[2]);
642 }
643
644 private:
645 float* weights_;
646};
647
656 public:
665 int num_rows,
666 int num_cols,
667 int num_samples,
668 int blocks) {
669 dims.push_back(num_rows);
670 dims.push_back(num_cols);
671 dims.push_back(num_samples);
672 dims.push_back(blocks);
673 }
674
683 void apply(
684 const SelectivityVector& rows,
685 std::vector<VectorPtr>& args,
686 const TypePtr& type,
687 exec::EvalCtx& context,
688 VectorPtr& output) const override {
689 auto elementType =
690 ArrayType(std::make_shared<ArrayType>(ArrayType(REAL())));
691 BaseVector::ensureWritable(
692 rows, std::make_shared<ArrayType>(elementType), context.pool(), output);
693 VectorMaker maker{context.pool()};
694
695 BaseVector* left = args[0].get();
696 BaseVector* right = args[1].get();
697
698 exec::LocalDecodedVector leftHolder(context, *left, rows);
699 auto decodedLeftArray = leftHolder.get();
700 auto baseLeftArray =
701 decodedLeftArray->base()->as<ArrayVector>()->elements();
702
703 exec::LocalDecodedVector rightHolder(context, *right, rows);
704 auto decodedRightArray = rightHolder.get();
705 auto baseRightArray = rightHolder->base()->as<ArrayVector>()->elements();
706
707 float* input_values_v = baseLeftArray->values()->asMutable<float>();
708 float* input_values_w = baseRightArray->values()->asMutable<float>();
709
710 Eigen::Map<
711 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
712 m1(input_values_v, dims[2], dims[0]);
713 Eigen::Map<
714 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
715 m2(input_values_w, dims[0], dims[1]);
716 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> m =
717 m1 * m2;
718
719 std::vector<std::vector<float>> result;
720 for (int i = 0; i < m.rows(); i++) {
721 std::vector<float> row(m.row(i).data(), m.row(i).data() + m.cols());
722 result.push_back(row);
723 }
724 auto baseVector = maker.arrayVector<float>(result, REAL());
725 auto arrayOfArrays = maker.arrayVector({0}, baseVector);
726 output = arrayOfArrays;
727 }
728
733 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
734 return {exec::FunctionSignatureBuilder()
735 .returnType("array(array(REAL))")
736 .argumentType("array(REAL)")
737 .argumentType("array(REAL)")
738 .build()};
739 }
740
745 float* getTensor() const override {
746 return weights_;
747 }
748
753 std::string getFuncName() {
754 return getName();
755 };
756
761 static std::string getName() {
762 return "mat_mul_block";
763 };
764
765 private:
766 float* weights_;
767};
768
776 public:
782 MatrixAddition(float* weights, int num_cols) {
783 weights_ = weights;
784 dims.push_back(num_cols);
785 }
786
792 MatrixAddition(std::string weightsFile, int num_cols) {
793 weightsFile_ = weightsFile;
794 dims.push_back(num_cols);
795 }
796
805 void apply(
806 const SelectivityVector& rows,
807 std::vector<VectorPtr>& args,
808 const TypePtr& type,
809 exec::EvalCtx& context,
810 VectorPtr& output) const override {
811 BaseVector::ensureWritable(rows, type, context.pool(), output);
812
813 auto input_elements = args[0]->as<ArrayVector>()->elements();
814 float* input_values = input_elements->values()->asMutable<float>();
815
816 Eigen::Map<
817 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
818 m1(input_values, rows.size(), dims[0]);
819 Eigen::Map<
820 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
821 m2(weights_, rows.size(), dims[0]);
822
823 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> m =
824 m1 + m2;
825
826 std::vector<std::vector<float>> result;
827 for (int i = 0; i < m.rows(); i++) {
828 std::vector<float> row(m.row(i).data(), m.row(i).data() + m.cols());
829 result.push_back(row);
830 }
831 VectorMaker maker{context.pool()};
832 output = maker.arrayVector<float>(result, REAL());
833 }
834
839 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
840 return {exec::FunctionSignatureBuilder()
841 .returnType("array(REAL)")
842 .argumentType("array(REAL)")
843 .build()};
844 }
845
850 float* getTensor() const override {
851 return weights_;
852 }
853
858 std::string getFuncName() {
859 return getName();
860 };
861
866 static std::string getName() {
867 return "mat_add";
868 };
869
874 std::string getWeightsFile() {
875 return weightsFile_;
876 }
877
882 void setWeights(float* weights) {
883 weights_ = weights;
884 }
885
891 CostEstimate getCost(std::vector<int> inputDims) {
892 std::vector<double> coefficientVector = getCoefficientVector(getName());
893 float cost = coefficientVector[0] * inputDims[0] * inputDims[1];
894 return CostEstimate(cost, inputDims[0], inputDims[1]);
895 }
896
897 private:
898 float* weights_;
899 std::string weightsFile_;
900};
901
910public:
917 MatrixVectorAddition(float* weights, int num_cols) {
918 // Create a deep copy of the weights
919 weights_ = new float[num_cols];
920 std::memcpy(weights_, weights, num_cols * sizeof(float));
921 dims.push_back(num_cols);
922 }
923
930 MatrixVectorAddition(std::string weightsFile, int num_cols) {
931 weightsFile_ = weightsFile;
932 dims.push_back(num_cols);
933 }
934
946 void apply(
947 const SelectivityVector& rows,
948 std::vector<VectorPtr>& args,
949 const TypePtr& type,
950 exec::EvalCtx& context,
951 VectorPtr& output) const override {
952 BaseVector::ensureWritable(rows, type, context.pool(), output);
953 output->clearNulls(rows);
954 auto arrayOutput = output->as<ArrayVector>();
955 auto sizes = arrayOutput->mutableSizes(rows.end());
956 auto rawSizes = sizes->asMutable<int32_t>();
957 auto offsets = arrayOutput->mutableOffsets(rows.end());
958 auto rawOffsets = offsets->asMutable<int32_t>();
959
960 // Initialize sizes and offsets to zero.
961 std::fill(rawSizes, rawSizes + rows.end(), 0);
962 std::fill(rawOffsets, rawOffsets + rows.end(), 0);
963 auto elementsOutput = arrayOutput->elements();
964 auto elementsPool = context.pool();
965
966 exec::DecodedArgs decodedArgs(rows, args, context);
967 auto decodedInput = decodedArgs.at(0);
968 auto numRows = rows.size();
969 auto inputArray = decodedInput->base()->as<ArrayVector>();
970 auto inputElements = inputArray->elements();
971 float* inputValues = inputElements->values()->asMutable<float>();
972 auto inputOffsets = inputArray->rawOffsets();
973 auto inputSizes = inputArray->rawSizes();
974
975 std::map<vector_size_t, vector_size_t> rowMap;
976 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
977 std::vector<vector_size_t> uniqueRawIndexeVector;
978 vector_size_t numUniqueRows = 0;
979 rows.applyToSelected([&](vector_size_t row) {
980 auto mappedIndexInRowData = decodedInput->index(row);
981 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
982 uniqueRawIndexeSet.end()) {
983 // add it
984 rowMap[row] = numUniqueRows;
985 uniqueRawIndexeSet.insert(mappedIndexInRowData);
986 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
987 ++numUniqueRows;
988 } else {
989 // already added
990 rowMap[row] = rowMap[mappedIndexInRowData];
991 }
992 });
993
994 int numInputMatrixRows = numUniqueRows;
995 Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);
996 int rowIndex = 0;
997 for (auto rawIndex : uniqueRawIndexeVector) {
998 Eigen::Map<const Eigen::VectorXf> rowVector(
999 inputValues + inputOffsets[rawIndex], dims[0]);
1000 inputMatrix.row(rowIndex++) = rowVector;
1001 }
1002
1003 Eigen::Map<
1004 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
1005 vectorMatrix(weights_, 1, dims[0]);
1006
1007 inputMatrix.rowwise() += vectorMatrix.row(0);
1008
1009 auto baseOffset = elementsOutput->size();
1010 elementsOutput->resize(baseOffset + rows.end() * dims[0]);
1011 float* outputValues = elementsOutput->values()->asMutable<float>();
1012
1013 vector_size_t outputOffset = 0;
1014
1015 rows.applyToSelected([&](vector_size_t row) {
1016 if (rowMap.find(row) == rowMap.end()) {
1017 throw std::runtime_error(
1018 "Mapped index not found for the result matrix.");
1019 }
1020 auto mappedIndexInResultMatrix = rowMap[row];
1021 rawOffsets[row] = outputOffset;
1022 rawSizes[row] = dims[0];
1023
1024 std::memcpy(
1025 outputValues + outputOffset,
1026 inputMatrix.row(mappedIndexInResultMatrix).data(),
1027 dims[0] * sizeof(float));
1028
1029 outputOffset += dims[0];
1030 });
1031 arrayOutput->setElements(elementsOutput);
1032 }
1033
1034 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1035 return {exec::FunctionSignatureBuilder()
1036 .returnType("array(REAL)")
1037 .argumentType("array(REAL)")
1038 .build()};
1039 }
1040
1041 float* getTensor() const override {
1042 return weights_;
1043 }
1044
1050 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1051 return {exec::FunctionSignatureBuilder()
1052 .returnType("array(REAL)")
1053 .argumentType("array(REAL)")
1054 .build()};
1055 }
1056
1062 float* getTensor() const override {
1063 return weights_;
1064 }
1065
1071 std::string getFuncName() {
1072 return getName();
1073 };
1074
1080 static std::string getName() {
1081 return "mat_add";
1082 };
1083
1089 std::string getWeightsFile() {
1090 return weightsFile_;
1091 }
1092
1098 void setWeights(float* weights) {
1099 weights_ = weights;
1100 }
1101
1102private:
1103 float* weights_;
1104 std::string weightsFile_;
1105 std::vector<int> dims;
1106};
1107
1114class Sigmoid : public MLFunction {
1115public:
1120
1127 static float sigmoidFunction(float x) {
1128 return 1.0f / (1.0f + std::exp(-x));
1129 }
1130
1143 void apply(
1144 const SelectivityVector& rows,
1145 std::vector<VectorPtr>& args,
1146 const TypePtr& type,
1147 exec::EvalCtx& context,
1148 VectorPtr& output) const override {
1149 BaseVector::ensureWritable(rows, type, context.pool(), output);
1150 exec::DecodedArgs decodedArgs(rows, args, context);
1151 auto decodedInput = decodedArgs.at(0);
1152 auto numRows = rows.size();
1153
1154 auto inputArray = decodedInput->base()->as<ArrayVector>();
1155 auto inputElements = inputArray->elements();
1156 float* inputValues = inputElements->values()->asMutable<float>();
1157 auto inputOffsets = inputArray->rawOffsets();
1158 auto inputSizes = inputArray->rawSizes();
1159
1160 std::vector<std::vector<float>> result(numRows);
1161
1162 rows.applyToSelected([&](vector_size_t i) {
1163 size_t mappedIndexInRowData = decodedInput->index(i);
1164 size_t dataSize = inputSizes[mappedIndexInRowData];
1165 size_t dataOffset = inputOffsets[mappedIndexInRowData];
1166 std::vector<float> rowResult(dataSize);
1167 std::transform(
1168 inputValues + dataOffset,
1169 inputValues + dataOffset + dataSize,
1170 rowResult.data(),
1172 result[i] = rowResult;
1173 });
1174 VectorMaker maker{context.pool()};
1175 output = maker.arrayVector<float>(result, REAL());
1176 }
1177
1183 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1184 return {exec::FunctionSignatureBuilder()
1185 .returnType("array(REAL)")
1186 .argumentType("array(REAL)")
1187 .build()};
1188 }
1189
1195 float* getTensor() const override {
1196 return new float[0];
1197 }
1198
1204 std::string getFuncName() {
1205 return getName();
1206 }
1207
1213 static std::string getName() {
1214 return "sigmoid";
1215 }
1216
1223 CostEstimate getCost(std::vector<int> inputDims) {
1224 std::vector<double> coefficientVector = getCoefficientVector(getName());
1225 float cost = coefficientVector[0] * inputDims[0] * inputDims[1];
1226 return CostEstimate(cost, inputDims[0], inputDims[1]);
1227 }
1228};
1229
1236class Relu : public MLFunction {
1237public:
1241 Relu() {}
1242
1249 static float reluFunction(float x) {
1250 return (x > 0.0f) ? x : 0.0f;
1251 }
1252
1265 void apply(
1266 const SelectivityVector& rows,
1267 std::vector<VectorPtr>& args,
1268 const TypePtr& type,
1269 exec::EvalCtx& context,
1270 VectorPtr& output) const override {
1271 BaseVector::ensureWritable(rows, type, context.pool(), output);
1272 exec::DecodedArgs decodedArgs(rows, args, context);
1273 auto decodedInput = decodedArgs.at(0);
1274 auto numRows = rows.size();
1275
1276 auto inputArray = decodedInput->base()->as<ArrayVector>();
1277 auto inputElements = inputArray->elements();
1278 float* inputValues = inputElements->values()->asMutable<float>();
1279 auto inputOffsets = inputArray->rawOffsets();
1280 auto inputSizes = inputArray->rawSizes();
1281
1282 std::vector<std::vector<float>> result(numRows);
1283
1284 rows.applyToSelected([&](vector_size_t i) {
1285 size_t mappedIndexInRowData = decodedInput->index(i);
1286 size_t dataSize = inputSizes[mappedIndexInRowData];
1287 size_t dataOffset = inputOffsets[mappedIndexInRowData];
1288 std::vector<float> rowResult(dataSize);
1289 std::transform(
1290 inputValues + dataOffset,
1291 inputValues + dataOffset + dataSize,
1292 rowResult.data(),
1293 reluFunction);
1294 result[i] = rowResult;
1295 });
1296 VectorMaker maker{context.pool()};
1297 output = maker.arrayVector<float>(result, REAL());
1298 }
1299
1305 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1306 return {exec::FunctionSignatureBuilder()
1307 .returnType("array(REAL)")
1308 .argumentType("array(REAL)")
1309 .build()};
1310 }
1311
1317 float* getTensor() const override {
1318 return new float[0];
1319 }
1320
1326 std::string getFuncName() {
1327 return getName();
1328 }
1329
1335 static std::string getName() {
1336 return "relu";
1337 }
1338
1345 CostEstimate getCost(std::vector<int> inputDims) {
1346 std::vector<double> coefficientVector = getCoefficientVector(getName());
1347 float cost = coefficientVector[0] * inputDims[0] * inputDims[1];
1348 return CostEstimate(cost, inputDims[0], inputDims[1]);
1349 }
1350};
1351
1359class Softmax : public MLFunction {
1360public:
1365
1377 void apply(
1378 const SelectivityVector& rows,
1379 std::vector<VectorPtr>& args,
1380 const TypePtr& type,
1381 exec::EvalCtx& context,
1382 VectorPtr& output) const override {
1383 BaseVector::ensureWritable(rows, type, context.pool(), output);
1384 output->clearNulls(rows);
1385 auto arrayOutput = output->as<ArrayVector>();
1386 auto sizes = arrayOutput->mutableSizes(rows.end());
1387 auto rawSizes = sizes->asMutable<int32_t>();
1388 auto offsets = arrayOutput->mutableOffsets(rows.end());
1389 auto rawOffsets = offsets->asMutable<int32_t>();
1390
1391 // Initialize sizes and offsets to zero.
1392 std::fill(rawSizes, rawSizes + rows.end(), 0);
1393 std::fill(rawOffsets, rawOffsets + rows.end(), 0);
1394 auto elementsOutput = arrayOutput->elements();
1395 auto elementsPool = context.pool();
1396
1397 exec::DecodedArgs decodedArgs(rows, args, context);
1398 auto decodedInput = decodedArgs.at(0);
1399 auto numRows = rows.size();
1400 auto inputArray = decodedInput->base()->as<ArrayVector>();
1401 auto inputElements = inputArray->elements();
1402 float* inputValues = inputElements->values()->asMutable<float>();
1403 auto inputOffsets = inputArray->rawOffsets();
1404 auto inputSizes = inputArray->rawSizes();
1405
1406 std::map<vector_size_t, vector_size_t> rowMap;
1407 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
1408 std::vector<vector_size_t> uniqueRawIndexeVector;
1409 vector_size_t numUniqueRows = 0;
1410 int numCols;
1411 rows.applyToSelected([&](vector_size_t row) {
1412 auto mappedIndexInRowData = decodedInput->index(row);
1413 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
1414 uniqueRawIndexeSet.end()) {
1415 // add it
1416 rowMap[row] = numUniqueRows;
1417 uniqueRawIndexeSet.insert(mappedIndexInRowData);
1418 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
1419 ++numUniqueRows;
1420 numCols = inputSizes[mappedIndexInRowData];
1421 } else {
1422 // already added
1423 rowMap[row] = rowMap[mappedIndexInRowData];
1424 }
1425 });
1426
1427 int numInputMatrixRows = numUniqueRows;
1428 Eigen::MatrixXf inputMatrix(numInputMatrixRows, numCols);
1429 int rowIndex = 0;
1430 for (auto rawIndex : uniqueRawIndexeVector) {
1431 Eigen::Map<const Eigen::VectorXf> rowVector(
1432 inputValues + inputOffsets[rawIndex], numCols);
1433 inputMatrix.row(rowIndex++) = rowVector;
1434 }
1435
1436 Eigen::ArrayXXf exp = inputMatrix.array().exp();
1437 Eigen::ArrayXXf sum = exp.rowwise().sum();
1438 for (int i = 0; i < exp.rows(); i++) {
1439 exp.row(i) /= sum(i);
1440 }
1441
1442 auto baseOffset = elementsOutput->size();
1443 elementsOutput->resize(baseOffset + rows.end() * numCols);
1444 float* outputValues = elementsOutput->values()->asMutable<float>();
1445 vector_size_t outputOffset = 0;
1446 rows.applyToSelected([&](vector_size_t row) {
1447 if (rowMap.find(row) == rowMap.end()) {
1448 throw std::runtime_error(
1449 "Mapped index not found for the result matrix.");
1450 }
1451 auto mappedIndexInResultMatrix = rowMap[row];
1452 rawOffsets[row] = outputOffset;
1453 rawSizes[row] = numCols;
1454
1455 std::memcpy(
1456 outputValues + outputOffset,
1457 exp.row(mappedIndexInResultMatrix).data(),
1458 numCols * sizeof(float));
1459
1460 outputOffset += numCols;
1461 });
1462
1463 arrayOutput->setElements(elementsOutput);
1464 }
1465
1471 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1472 return {exec::FunctionSignatureBuilder()
1473 .returnType("array(REAL)")
1474 .argumentType("array(REAL)")
1475 .build()};
1476 }
1477
1483 float* getTensor() const override {
1484 return new float[0];
1485 }
1486
1492 std::string getFuncName() {
1493 return getName();
1494 }
1495
1501 static std::string getName() {
1502 return "softmax";
1503 }
1504
1511 CostEstimate getCost(std::vector<int> inputDims) {
1512 std::vector<double> coefficientVector = getCoefficientVector(getName());
1513 float cost = coefficientVector[0] * inputDims[0] * inputDims[1];
1514 return CostEstimate(cost, inputDims[0], inputDims[1]);
1515 }
1516};
1517
1524class Argmax : public MLFunction {
1525public:
1530
1542 void apply(
1543 const SelectivityVector& rows,
1544 std::vector<VectorPtr>& args,
1545 const TypePtr& type,
1546 exec::EvalCtx& context,
1547 VectorPtr& output) const override {
1548 BaseVector::ensureWritable(rows, type, context.pool(), output);
1549 auto arrayOutput = output->asFlatVector<int>();
1550
1551 exec::DecodedArgs decodedArgs(rows, args, context);
1552 auto decodedInput = decodedArgs.at(0);
1553 auto numRows = rows.size();
1554
1555 auto inputArray = decodedInput->base()->as<ArrayVector>();
1556 auto inputElements = inputArray->elements();
1557 float* inputValues = inputElements->values()->asMutable<float>();
1558 auto inputOffsets = inputArray->rawOffsets();
1559 auto inputSizes = inputArray->rawSizes();
1560
1561 std::map<vector_size_t, vector_size_t> rowMap;
1562 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
1563 std::vector<vector_size_t> uniqueRawIndexeVector;
1564 vector_size_t numUniqueRows = 0;
1565 int numCols;
1566 rows.applyToSelected([&](vector_size_t row) {
1567 auto mappedIndexInRowData = decodedInput->index(row);
1568 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
1569 uniqueRawIndexeSet.end()) {
1570 // add it
1571 rowMap[row] = numUniqueRows;
1572 uniqueRawIndexeSet.insert(mappedIndexInRowData);
1573 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
1574 ++numUniqueRows;
1575 numCols = inputSizes[mappedIndexInRowData];
1576 } else {
1577 // already added
1578 rowMap[row] = rowMap[mappedIndexInRowData];
1579 }
1580 });
1581
1582 int numInputMatrixRows = numUniqueRows;
1583 Eigen::MatrixXf inputMatrix(numInputMatrixRows, numCols);
1584 int rowIndex = 0;
1585 for (auto rawIndex : uniqueRawIndexeVector) {
1586 Eigen::Map<const Eigen::VectorXf> rowVector(
1587 inputValues + inputOffsets[rawIndex], numCols);
1588 inputMatrix.row(rowIndex++) = rowVector;
1589 }
1590
1591 std::map<vector_size_t, vector_size_t> argmaxMap;
1592 for (int i = 0; i < inputMatrix.rows(); i++) {
1593 Eigen::Index maxRow, maxCol;
1594 inputMatrix.row(i).maxCoeff(&maxRow, &maxCol);
1595 argmaxMap[i] = maxCol;
1596 }
1597
1598 int* outputValues = arrayOutput->mutableRawValues<int>();
1599 vector_size_t outputOffset = 0;
1600 std::unordered_map<int, int> valueCounts;
1601 rows.applyToSelected([&](vector_size_t row) {
1602 if (rowMap.find(row) == rowMap.end()) {
1603 throw std::runtime_error(
1604 "Mapped index not found for the result matrix.");
1605 }
1606 auto mappedIndexInResultMatrix = rowMap[row];
1607 outputValues[row] = argmaxMap[mappedIndexInResultMatrix];
1608 valueCounts[outputValues[row]]++;
1609 });
1610
1611 for (const auto& pair : valueCounts) {
1612 LOG(INFO) << "[INFO] Label Distributions: Key: " << pair.first
1613 << ", Value: " << pair.second << std::endl;
1614 }
1615 }
1616
1622 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1623 return {exec::FunctionSignatureBuilder()
1624 .returnType("INTEGER")
1625 .argumentType("array(REAL)")
1626 .build()};
1627 }
1628
1634 float* getTensor() const override {
1635 return new float[0];
1636 }
1637
1643 std::string getFuncName() {
1644 return getName();
1645 }
1646
1652 static std::string getName() {
1653 return "argmax";
1654 }
1655
1662 CostEstimate getCost(std::vector<int> inputDims) {
1663 std::vector<double> coefficientVector = getCoefficientVector(getName());
1664 float cost = coefficientVector[0] * inputDims[0] * inputDims[1];
1665 return CostEstimate(cost, inputDims[0], inputDims[1]);
1666 }
1667};
1668
1678class MinMaxScaler : public MLFunction {
1679public:
1687 MinMaxScaler(float* scalerMinValues, float* scalerMaxValues, int numCols) {
1688 scalerMinValues_ = new float[numCols];
1689 scalerMaxValues_ = new float[numCols];
1690 std::memcpy(scalerMinValues_, scalerMinValues, numCols * sizeof(float));
1691 std::memcpy(scalerMaxValues_, scalerMaxValues, numCols * sizeof(float));
1692 numCols_ = numCols;
1693 }
1694
1700 MinMaxScaler(std::string minMaxScalerDataPath) {
1701 std::vector<float> scalerMinVector;
1702 std::vector<float> scalerMaxVector;
1703
1704 if (!std::filesystem::exists(minMaxScalerDataPath)) {
1705 throw std::runtime_error("File not found: " + minMaxScalerDataPath);
1706 }
1707 std::ifstream file(minMaxScalerDataPath);
1708 std::string line;
1709 // Read each line from the file
1710 int lineCount = 0;
1711 while (std::getline(file, line)) {
1712 std::istringstream iss(line); // Create a string stream from the line
1713 float value;
1714
1715 // Read each value from the line
1716 // First line should be min values
1717 // Second line should be max values
1718 while (iss >> value) {
1719 if (lineCount == 0) {
1720 scalerMinVector.push_back(value); // Store the value in tempValues
1721 } else if (lineCount == 1) {
1722 scalerMaxVector.push_back(value); // Store the value in tempValues
1723 } else {
1724 throw std::runtime_error(
1725 "Invalid file format, parsed lineCount: " +
1726 std::to_string(lineCount));
1727 }
1728 }
1729 lineCount++;
1730 }
1731 file.close(); // Close the file
1732 // the size should be equal
1733 assert(scalerMinVector.size() == scalerMaxVector.size());
1734 numCols_ = scalerMinVector.size();
1735
1736 scalerMinValues_ = new float[numCols_];
1737 scalerMaxValues_ = new float[numCols_];
1738 std::memcpy(
1739 scalerMinValues_, scalerMinVector.data(), numCols_ * sizeof(float));
1740 std::memcpy(
1741 scalerMaxValues_, scalerMaxVector.data(), numCols_ * sizeof(float));
1742 }
1743
1755 void apply(
1756 const SelectivityVector& rows,
1757 std::vector<VectorPtr>& args,
1758 const TypePtr& type,
1759 exec::EvalCtx& context,
1760 VectorPtr& output) const override {
1761 BaseVector::ensureWritable(rows, type, context.pool(), output);
1762
1763 output->clearNulls(rows);
1764 auto arrayOutput = output->as<ArrayVector>();
1765 auto sizes = arrayOutput->mutableSizes(rows.end());
1766 auto rawSizes = sizes->asMutable<int32_t>();
1767 auto offsets = arrayOutput->mutableOffsets(rows.end());
1768 auto rawOffsets = offsets->asMutable<int32_t>();
1769
1770 // Initialize sizes and offsets to zero.
1771 std::fill(rawSizes, rawSizes + rows.end(), 0);
1772 std::fill(rawOffsets, rawOffsets + rows.end(), 0);
1773 auto elementsOutput = arrayOutput->elements();
1774 auto elementsPool = context.pool();
1775
1776 exec::DecodedArgs decodedArgs(rows, args, context);
1777 auto decodedInput = decodedArgs.at(0);
1778 auto numRows = rows.size();
1779 auto inputArray = decodedInput->base()->as<ArrayVector>();
1780 auto inputElements = inputArray->elements();
1781 float* inputValues = inputElements->values()->asMutable<float>();
1782 auto inputOffsets = inputArray->rawOffsets();
1783 auto inputSizes = inputArray->rawSizes();
1784
1785 std::map<vector_size_t, vector_size_t> rowMap;
1786 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
1787 std::vector<vector_size_t> uniqueRawIndexeVector;
1788 vector_size_t numUniqueRows = 0;
1789 int numCols = numCols_;
1790 rows.applyToSelected([&](vector_size_t row) {
1791 auto mappedIndexInRowData = decodedInput->index(row);
1792 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
1793 uniqueRawIndexeSet.end()) {
1794 // add it
1795 rowMap[row] = numUniqueRows;
1796 uniqueRawIndexeSet.insert(mappedIndexInRowData);
1797 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
1798 ++numUniqueRows;
1799 } else {
1800 // already added
1801 rowMap[row] = rowMap[mappedIndexInRowData];
1802 }
1803 });
1804
1805 int numInputMatrixRows = numUniqueRows;
1806 Eigen::MatrixXf inputMatrix(numInputMatrixRows, numCols);
1807 int rowIndex = 0;
1808 for (auto rawIndex : uniqueRawIndexeVector) {
1809 Eigen::Map<const Eigen::VectorXf> rowVector(
1810 inputValues + inputOffsets[rawIndex], numCols);
1811 inputMatrix.row(rowIndex++) = rowVector;
1812 }
1813
1814 Eigen::Map<
1815 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
1816 minVals(scalerMinValues_, 1, numCols);
1817 Eigen::Map<
1818 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
1819 maxVals(scalerMaxValues_, 1, numCols);
1820 Eigen::MatrixXf resultMatrix =
1821 (inputMatrix.rowwise() - minVals.row(0)).array().rowwise() /
1822 (maxVals.row(0) - minVals.row(0)).array();
1823
1824 auto baseOffset = elementsOutput->size();
1825 elementsOutput->resize(baseOffset + rows.end() * numCols);
1826 float* outputValues = elementsOutput->values()->asMutable<float>();
1827 vector_size_t outputOffset = 0;
1828 rows.applyToSelected([&](vector_size_t row) {
1829 if (rowMap.find(row) == rowMap.end()) {
1830 throw std::runtime_error(
1831 "Mapped index not found for the result matrix.");
1832 }
1833 auto mappedIndexInResultMatrix = rowMap[row];
1834 rawOffsets[row] = outputOffset;
1835 rawSizes[row] = numCols;
1836
1837 std::memcpy(
1838 outputValues + outputOffset,
1839 resultMatrix.row(mappedIndexInResultMatrix).data(),
1840 numCols * sizeof(float));
1841
1842 outputOffset += numCols;
1843 });
1844 arrayOutput->setElements(elementsOutput);
1845 }
1846
1852 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1853 return {exec::FunctionSignatureBuilder()
1854 .returnType("array(REAL)")
1855 .argumentType("array(REAL)")
1856 .build()};
1857 }
1858
1864 float* getTensor() const override {
1865 return new float[0];
1866 }
1867
1873 std::string getFuncName() {
1874 return getName();
1875 }
1876
1882 static std::string getName() {
1883 return "min_max_scaler";
1884 }
1885
1892 CostEstimate getCost(std::vector<int> inputDims) {
1893 std::vector<double> coefficientVector = getCoefficientVector(getName());
1894 float cost = coefficientVector[0] * inputDims[0] * inputDims[1];
1895 return CostEstimate(cost, inputDims[0], inputDims[1]);
1896 }
1897
1898private:
1899 float* scalerMinValues_;
1900 float* scalerMaxValues_;
1901 int numCols_;
1902};
1903
1910public:
1918 TorchDNN2Level(float** weights, float** bias, std::vector<int> dimensions) {
1919 this->weights = weights;
1920 this->bias = bias;
1921 dims = dimensions;
1922 }
1923
1935 void apply(
1936 const SelectivityVector& rows,
1937 std::vector<VectorPtr>& args,
1938 const TypePtr& type,
1939 exec::EvalCtx& context,
1940 VectorPtr& output) const override {
1941 std::chrono::steady_clock::time_point begin =
1942 std::chrono::steady_clock::now();
1943 torch::nn::Linear dense1(dims[0], dims[1]);
1944 torch::nn::Linear dense2(dims[1], dims[2]);
1945 torch::nn::ReLU relu;
1946
1947 torch::Tensor weightTensor1 =
1948 torch::from_blob(weights[0], {dims[0], dims[1]}).t();
1949 torch::Tensor weightTensor2 =
1950 torch::from_blob(weights[1], {dims[1], dims[2]}).t();
1951 torch::Tensor bias1 = torch::from_blob(bias[0], {dims[1]});
1952 torch::Tensor bias2 = torch::from_blob(bias[1], {dims[2]});
1953
1954 dense1->weight.set_data(weightTensor1);
1955 dense2->weight.set_data(weightTensor2);
1956 dense1->bias.set_data(bias1);
1957 dense2->bias.set_data(bias2);
1958
1959 auto input_elements = args[0]->as<ArrayVector>()->elements();
1960 float* input_values = input_elements->values()->asMutable<float>();
1961 int input_size = input_elements->size();
1962
1963 torch::Tensor input =
1964 torch::from_blob(input_values, {rows.size(), dims[0]});
1965
1966 torch::Tensor layer1_output = dense1->forward(input);
1967 torch::Tensor reluOutput = relu->forward(layer1_output);
1968 torch::Tensor layer2_output = dense2->forward(reluOutput);
1969 torch::Tensor softmax_output =
1970 torch::nn::functional::softmax(layer2_output, 1);
1971 float* data = softmax_output.data_ptr<float>();
1972
1973 std::vector<std::vector<float>> results;
1974 for (int i = 0; i < rows.size(); ++i) {
1975 // std::vector<float> result;
1976 std::vector<float> result(data + i * dims[2], data + (i + 1) * dims[2]);
1977 // for (int j = 0; j < dims[2]; ++j) {
1978 // result.push_back(data[i*dims[2] + j]);
1979 // }
1980 results.push_back(result);
1981 }
1982 VectorMaker maker{context.pool()};
1983 output = maker.arrayVector<float>(results, REAL());
1984 }
1985
1991 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
1992 return {exec::FunctionSignatureBuilder()
1993 .returnType("array(REAL)")
1994 .argumentType("array(REAL)")
1995 .build()};
1996 }
1997
2003 float* getTensor() const override {
2004 return new float[0];
2005 }
2006
2012 float** getWeights() const {
2013 return weights;
2014 }
2015
2021 float** getBias() const {
2022 return bias;
2023 }
2024
2030 std::string getFuncName() {
2031 return getName();
2032 }
2033
2039 static std::string getName() {
2040 return "torch_dnn";
2041 }
2042
2049 CostEstimate getCost(std::vector<int> inputDims) {
2050 float cost = getWeightedCost(
2051 getName(), inputDims[0] * inputDims[1] * dims[0] * dims[1]);
2052 return CostEstimate(cost, inputDims[0], inputDims[1]);
2053 }
2054
2055private:
2056 float** weights;
2057 float** bias;
2058 std::vector<int> dims;
2059};
2060
2066class TorchDNN : public MLFunction {
2067public:
2076 std::vector<float*> weights,
2077 std::vector<float*> bias,
2078 std::vector<int> dimensions) {
2079 this->weights = weights;
2080 this->bias = bias;
2081 dims = dimensions;
2082 }
2083
2095 void apply(
2096 const SelectivityVector& rows,
2097 std::vector<VectorPtr>& args,
2098 const TypePtr& type,
2099 exec::EvalCtx& context,
2100 VectorPtr& output) const override {
2101 std::vector<torch::nn::Linear> dense_layers;
2102 std::vector<torch::Tensor> weights_tensors;
2103 std::vector<torch::Tensor> bias_tensors;
2104 std::vector<torch::nn::ReLU> relus;
2105
2106 // Create layers
2107 for (int i = 0; i < dims.size() - 1; ++i) {
2108 dense_layers.push_back(torch::nn::Linear(dims[i], dims[i + 1]));
2109 weights_tensors.push_back(
2110 torch::from_blob(weights[i], {dims[i], dims[i + 1]}).t());
2111 bias_tensors.push_back(torch::from_blob(bias[i], {dims[i + 1]}));
2112 relus.push_back(torch::nn::ReLU());
2113 }
2114
2115 // Set weights and biases
2116 for (int i = 0; i < dense_layers.size(); ++i) {
2117 dense_layers[i]->weight.set_data(weights_tensors[i]);
2118 dense_layers[i]->bias.set_data(bias_tensors[i]);
2119 }
2120
2121 auto input_elements = args[0]->as<ArrayVector>()->elements();
2122 float* input_values = input_elements->values()->asMutable<float>();
2123 torch::Tensor input =
2124 torch::from_blob(input_values, {rows.size(), dims[0]});
2125
2126 torch::Tensor output_tensor = input;
2127 for (int i = 0; i < dense_layers.size(); ++i) {
2128 output_tensor = dense_layers[i]->forward(output_tensor);
2129 output_tensor = relus[i]->forward(output_tensor);
2130 }
2131
2132 // Softmax output
2133 output_tensor = torch::nn::functional::softmax(output_tensor, 1);
2134 float* data = output_tensor.data_ptr<float>();
2135
2136 // Prepare results
2137 std::vector<std::vector<float>> results;
2138 for (int i = 0; i < rows.size(); ++i) {
2139 std::vector<float> result(
2140 data + i * dims.back(), data + (i + 1) * dims.back());
2141 results.push_back(result);
2142 }
2143
2144 VectorMaker maker{context.pool()};
2145 output = maker.arrayVector<float>(results, REAL());
2146 }
2147
2153 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
2154 return {exec::FunctionSignatureBuilder()
2155 .returnType("array(REAL)")
2156 .argumentType("array(REAL)")
2157 .build()};
2158 }
2159
2165 float* getTensor() const override {
2166 return new float[0];
2167 }
2168
2174 const std::vector<float*>& getWeights() const {
2175 return weights;
2176 }
2177
2183 const std::vector<float*>& getBias() const {
2184 return bias;
2185 }
2186
2192 std::string getFuncName() {
2193 return getName();
2194 }
2195
2201 static std::string getName() {
2202 return "torchnn";
2203 }
2204
2211 CostEstimate getCost(std::vector<int> inputDims) {
2212 std::vector<double> coefficientVector = getCoefficientVector(getName());
2213 uint64_t factor1 = inputDims[0] * dims[0] * dims[1];
2214 uint64_t factor2 = inputDims[0] * dims[1] * dims[2];
2215 uint64_t factor3 = dims[0] * dims[1];
2216 uint64_t factor4 = dims[1] * dims[2];
2217 float cost = coefficientVector[0] * factor1 +
2218 coefficientVector[1] * factor2 + coefficientVector[2] * factor3 +
2219 coefficientVector[3] * factor4 + coefficientVector[4] * inputDims[0] +
2220 coefficientVector[5] * dims[0] + coefficientVector[6] * dims[1] +
2221 coefficientVector[7] * dims[2];
2222 // LOG(INFO) << fmt::format("[DEBUG] 4 values: {}, {}, {}, {}",inputDims[0],
2223 // inputDims[1], dims[0], dims[1]); LOG(INFO) << fmt::format("[DEBUG] coeff:
2224 // {}",coefficientVector); LOG(INFO) << fmt::format("[DEBUG] Cost
2225 // Computation: {}, {}, {}, {}, {}, {}, {}, {}", coefficientVector[0] *
2226 // factor1, coefficientVector[1] * factor2, coefficientVector[2] * factor3
2227 // , coefficientVector[3] * factor4, coefficientVector[4] *
2228 // inputDims[0] , coefficientVector[5] * dims[0],
2229 // coefficientVector[6] * dims[1] , coefficientVector[7] *
2230 // dims[2]);
2231 // LOG(INFO) << fmt::format("[DEBUG] compute debug: {}, {}, {}, {}, {}",
2232 // inputDims[0], inputDims[0]*dims[1], inputDims[0]*dims[1]*dims[2],
2233 // factor2, coefficientVector[1] * factor2);
2234
2235 return CostEstimate(cost, inputDims[0], dims[2]);
2236 }
2237
2238private:
2239 std::vector<float*> weights;
2240 std::vector<float*> bias;
2241 std::vector<int> dims;
2242};
2243
2247namespace velox::dl {
2248
2262
2268std::string kernelTypeToString(KernelType kernelType) {
2269 switch (kernelType) {
2270 case KernelType::MatMul:
2271 return "MatMul";
2272 case KernelType::MatAdd:
2273 return "MatAdd";
2274 case KernelType::ReLU:
2275 return "ReLU";
2277 return "Softmax";
2279 return "BatchNorm";
2280 case KernelType::Argmax:
2281 return "Argmax";
2283 return "Sigmoid";
2284 default:
2285 return "Unknown";
2286 }
2287}
2288
2295std::ostream& operator<<(std::ostream& os, KernelType kernelType) {
2296 switch (kernelType) {
2297 case KernelType::MatMul:
2298 return os << "MatMul";
2299 case KernelType::MatAdd:
2300 return os << "MatAdd";
2301 case KernelType::ReLU:
2302 return os << "ReLU";
2304 return os << "Softmax";
2306 return os << "BatchNorm";
2307 case KernelType::Argmax:
2308 return os << "Argmax";
2309 default:
2310 return os << "Unknown";
2311 }
2312}
2313
2314} // namespace velox::dl
2321class TorchDNNV2 : public MLFunction {
2322public:
2331 std::vector<velox::dl::KernelType> kernelTypes,
2332 std::vector<float*> weights,
2333 std::vector<int> dimensions) {
2334 this->weights = weights;
2335 dims = dimensions;
2336 kernelTypes_ = kernelTypes;
2337 int numOps = kernelTypes.size();
2338 int weightIdx = 0;
2339 hasArgmax_ = false;
2340 model_ = torch::nn::Sequential();
2341 if (2 * numOps != dims.size()) {
2342 throw std::runtime_error(fmt::format(
2343 "Mismatched number of 2*kernel types and dimensions: {} vs {}",
2344 2 * numOps,
2345 dims.size()));
2346 }
2347 assert(2 * numOps == dims.size());
2348 for (int i = 0; i < numOps; ++i) {
2349 if (kernelTypes[i] == velox::dl::KernelType::MatMul &&
2350 kernelTypes[i + 1] == velox::dl::KernelType::MatAdd) {
2351 auto denseLayer = torch::nn::Linear(dims[2 * i], dims[2 * i + 1]);
2352 denseLayer->weight.set_data(
2353 torch::from_blob(
2354 weights[weightIdx++], {dims[2 * i], dims[2 * i + 1]})
2355 .t());
2356 denseLayer->bias.set_data(
2357 torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));
2358 model_->push_back(denseLayer);
2359 } else if (kernelTypes[i] == velox::dl::KernelType::MatAdd) {
2360 // Do nothing, which is handled by creating a Dense Layer in the above
2361 // code
2362 } else if (kernelTypes[i] == velox::dl::KernelType::BatchNorm) {
2363 auto batchNormLayer = torch::nn::BatchNorm1d(dims[2 * i]);
2364 batchNormLayer->weight.set_data(
2365 torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));
2366 batchNormLayer->bias.set_data(
2367 torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));
2368 model_->push_back(batchNormLayer);
2369 } else if (kernelTypes[i] == velox::dl::KernelType::ReLU) {
2370 model_->push_back(torch::nn::ReLU());
2371 } else if (kernelTypes[i] == velox::dl::KernelType::Sigmoid) {
2372 model_->push_back(torch::nn::Sigmoid());
2373 } else if (kernelTypes[i] == velox::dl::KernelType::Softmax) {
2374 model_->push_back(torch::nn::Softmax(1));
2375 } else if (kernelTypes[i] == velox::dl::KernelType::Argmax) {
2376 model_->push_back(LibTorchArgmaxKernel(1));
2377 hasArgmax_ = true;
2378 } else {
2379 throw std::runtime_error(fmt::format(
2380 "Unsupported kernel type of TorchDNNV2: {}", kernelTypes[i]));
2381 }
2382 }
2383 // enable evaluation mode, this is required for inference, otherwise some
2384 // module could failed, like dropout, batchnorm, etc.
2385 model_->eval();
2386 }
2387
2399 void apply(
2400 const SelectivityVector& rows,
2401 std::vector<VectorPtr>& args,
2402 const TypePtr& type,
2403 exec::EvalCtx& context,
2404 VectorPtr& output) const override {
2405 context.ensureWritable(rows, type, output);
2406 output->clearNulls(rows);
2407
2408 // Perform matrix multiplication logic.
2409 exec::DecodedArgs decodedArgs(rows, args, context);
2410 auto decodedInput = decodedArgs.at(0);
2411 auto inputArray = decodedInput->base()->as<ArrayVector>();
2412 auto inputElements = inputArray->elements();
2413 float* inputValues = inputElements->values()->asMutable<float>();
2414 auto inputOffsets = inputArray->rawOffsets();
2415 auto inputSizes = inputArray->rawSizes();
2416
2417 // The map between the row index in the input data and the row index in
2418 // the output data.
2419 std::map<vector_size_t, vector_size_t> rowMap;
2420 // for efficient check
2421 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
2422 // for iterating over the insert ordering
2423 std::vector<vector_size_t> uniqueRawIndexeVector;
2424 vector_size_t numUniqueRows = 0;
2425 rows.applyToSelected([&](vector_size_t row) {
2426 auto mappedIndexInRowData = decodedInput->index(row);
2427 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
2428 uniqueRawIndexeSet.end()) {
2429 // add it
2430 rowMap[row] = numUniqueRows;
2431 uniqueRawIndexeSet.insert(mappedIndexInRowData);
2432 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
2433 ++numUniqueRows;
2434 } else {
2435 // already added
2436 rowMap[row] = rowMap[mappedIndexInRowData];
2437 }
2438 });
2439
2440 int numInputMatrixRows = numUniqueRows;
2441 Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);
2442 int rowIndex = 0;
2443 for (auto rawIndex : uniqueRawIndexeVector) {
2444 Eigen::Map<const Eigen::VectorXf> rowVector(
2445 inputValues + inputOffsets[rawIndex], dims[0]);
2446 inputMatrix.row(rowIndex++) = rowVector;
2447 }
2448
2449 float* inputValues1 = inputMatrix.data();
2450
2451 torch::Tensor input =
2452 torch::from_blob(inputValues1, {numUniqueRows, dims[0]});
2453 torch::Tensor output_tensor = input;
2454
2455 output_tensor =
2456 const_cast<torch::nn::Sequential&>(model_)->forward(output_tensor);
2457 // Append results to the output vector.
2458 if (hasArgmax_) {
2459 auto arrayOutput = output->asFlatVector<int>();
2460 int* outputValues = arrayOutput->mutableRawValues<int>();
2461 auto int_tensor = output_tensor.to(torch::kInt);
2462 int* dataInt = int_tensor.data_ptr<int>();
2463
2464 rows.applyToSelected([&](vector_size_t row) {
2465 if (rowMap.find(row) == rowMap.end()) {
2466 throw std::runtime_error(
2467 "Mapped index not found for the result matrix.");
2468 }
2469 auto mappedIndexInResultMatrix = rowMap[row];
2470 outputValues[row] = dataInt[mappedIndexInResultMatrix];
2471 });
2472 } else {
2473 auto arrayOutput = output->as<ArrayVector>();
2474 auto sizes = arrayOutput->mutableSizes(rows.end());
2475 auto rawSizes = sizes->asMutable<int32_t>();
2476 auto offsets = arrayOutput->mutableOffsets(rows.end());
2477 auto rawOffsets = offsets->asMutable<int32_t>();
2478
2479 // Initialize sizes and offsets to zero.
2480 std::fill(rawSizes, rawSizes + rows.end(), 0);
2481 std::fill(rawOffsets, rawOffsets + rows.end(), 0);
2482
2483 auto elementsOutput = arrayOutput->elements();
2484 auto elementsPool = context.pool();
2485 auto baseOffset = elementsOutput->size();
2486 elementsOutput->resize(baseOffset + rows.end() * dims.back());
2487
2488 float* outputValues = elementsOutput->values()->asMutable<float>();
2489 vector_size_t outputOffset = 0;
2490 float* dataFloat = output_tensor.data_ptr<float>();
2491
2492 rows.applyToSelected([&](vector_size_t row) {
2493 if (rowMap.find(row) == rowMap.end()) {
2494 throw std::runtime_error(
2495 "Mapped index not found for the result matrix.");
2496 }
2497 auto mappedIndexInResultMatrix = rowMap.at(row);
2498 rawOffsets[row] = outputOffset;
2499 rawSizes[row] = dims.back();
2500 std::memcpy(
2501 outputValues + outputOffset,
2502 dataFloat + mappedIndexInResultMatrix * dims.back(),
2503 dims.back() * sizeof(float));
2504 outputOffset += dims.back();
2505 });
2506 arrayOutput->setElements(elementsOutput);
2507 }
2508 }
2509
2515 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
2516 return {
2517 exec::FunctionSignatureBuilder()
2518 .returnType("array(REAL)")
2519 .argumentType("array(REAL)")
2520 .build(),
2521 exec::FunctionSignatureBuilder()
2522 .returnType("INTEGER")
2523 .argumentType("array(REAL)")
2524 .argumentType("INTEGER")
2525 .build(),
2526 exec::FunctionSignatureBuilder()
2527 .returnType("INTEGER")
2528 .argumentType("array(REAL)")
2529 .argumentType("BIGINT")
2530 .build()};
2531 }
2532
2538 float* getTensor() const override {
2539 return new float[0];
2540 }
2541
2547 const std::vector<float*>& getWeights() const {
2548 return weights;
2549 }
2550
2556 const std::vector<float*>& getBias() const {
2557 return bias;
2558 }
2559
2565 std::string getFuncName() {
2566 return getName();
2567 }
2568
2574 static std::string getName() {
2575 return "complexTorchNN";
2576 }
2577
2583 std::vector<velox::dl::KernelType> getKernelTypes() const {
2584 return kernelTypes_;
2585 }
2586
2593 CostEstimate getCost(std::vector<int> inputDims) {
2594 return CostEstimate(0, inputDims[0], inputDims[1]);
2595 }
2596
2597private:
2598 std::vector<float*> weights;
2599 std::vector<float*> bias;
2600 std::vector<velox::dl::KernelType> kernelTypes_;
2601 bool hasArgmax_;
2602 torch::nn::Sequential model_;
2603};
2604
2611public:
2620 std::vector<velox::dl::KernelType> kernelTypes,
2621 std::vector<float*> weights,
2622 std::vector<int> dimensions) {
2623 device_ = "cuda:0"; // Initialize CUDA device.
2624 this->weights = weights;
2625 dims = dimensions;
2626 kernelTypes_ = kernelTypes;
2627 int numOps = kernelTypes.size();
2628 int weightIdx = 0;
2629 hasArgmax_ = false;
2630 model_ = torch::nn::Sequential();
2631 assert(2 * numOps == dims.size());
2632 for (int i = 0; i < numOps; ++i) {
2633 if (kernelTypes[i] == velox::dl::KernelType::MatMul &&
2634 kernelTypes[i + 1] == velox::dl::KernelType::MatAdd) {
2635 auto denseLayer = torch::nn::Linear(dims[2 * i], dims[2 * i + 1]);
2636 denseLayer->weight.set_data(
2637 torch::from_blob(
2638 weights[weightIdx++], {dims[2 * i], dims[2 * i + 1]})
2639 .t());
2640 denseLayer->bias.set_data(
2641 torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));
2642 model_->push_back(denseLayer);
2643 } else if (kernelTypes[i] == velox::dl::KernelType::MatAdd) {
2644 // Do nothing, which is handled by creating a Dense Layer in the above
2645 // code
2646 } else if (kernelTypes[i] == velox::dl::KernelType::BatchNorm) {
2647 auto batchNormLayer = torch::nn::BatchNorm1d(dims[2 * i]);
2648 batchNormLayer->weight.set_data(
2649 torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));
2650 batchNormLayer->bias.set_data(
2651 torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));
2652 model_->push_back(batchNormLayer);
2653 } else if (kernelTypes[i] == velox::dl::KernelType::ReLU) {
2654 model_->push_back(torch::nn::ReLU());
2655 } else if (kernelTypes[i] == velox::dl::KernelType::Sigmoid) {
2656 model_->push_back(torch::nn::Sigmoid());
2657 } else if (kernelTypes[i] == velox::dl::KernelType::Softmax) {
2658 model_->push_back(torch::nn::Softmax(1));
2659 } else if (kernelTypes[i] == velox::dl::KernelType::Argmax) {
2660 model_->push_back(LibTorchArgmaxKernel(1));
2661 hasArgmax_ = true;
2662 } else {
2663 throw std::runtime_error(fmt::format(
2664 "Unsupported kernel type of TorchDNNV2: {}", kernelTypes[i]));
2665 }
2666 }
2667 // enable evaluation mode, this is required for inference, otherwise some
2668 // module could failed, like dropout, batchnorm, etc.
2669 model_->to(device_);
2670 model_->eval();
2671 }
2672
2684 void apply(
2685 const SelectivityVector& rows,
2686 std::vector<VectorPtr>& args,
2687 const TypePtr& type,
2688 exec::EvalCtx& context,
2689 VectorPtr& output) const override {
2690 context.ensureWritable(rows, type, output);
2691 output->clearNulls(rows);
2692
2693 // Perform matrix multiplication logic.
2694 exec::DecodedArgs decodedArgs(rows, args, context);
2695 auto decodedInput = decodedArgs.at(0);
2696 auto inputArray = decodedInput->base()->as<ArrayVector>();
2697 auto inputElements = inputArray->elements();
2698 float* inputValues = inputElements->values()->asMutable<float>();
2699 auto inputOffsets = inputArray->rawOffsets();
2700 auto inputSizes = inputArray->rawSizes();
2701
2702 // The map between the row index in the input data and the row index in
2703 // the output data.
2704 std::map<vector_size_t, vector_size_t> rowMap;
2705 // for efficient check
2706 std::unordered_set<vector_size_t> uniqueRawIndexeSet;
2707 // for iterating over the insert ordering
2708 std::vector<vector_size_t> uniqueRawIndexeVector;
2709 vector_size_t numUniqueRows = 0;
2710 rows.applyToSelected([&](vector_size_t row) {
2711 auto mappedIndexInRowData = decodedInput->index(row);
2712 if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==
2713 uniqueRawIndexeSet.end()) {
2714 // add it
2715 rowMap[row] = numUniqueRows;
2716 uniqueRawIndexeSet.insert(mappedIndexInRowData);
2717 uniqueRawIndexeVector.push_back(mappedIndexInRowData);
2718 ++numUniqueRows;
2719 } else {
2720 // already added
2721 rowMap[row] = rowMap[mappedIndexInRowData];
2722 }
2723 });
2724
2725 int numInputMatrixRows = numUniqueRows;
2726 Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);
2727 int rowIndex = 0;
2728 for (auto rawIndex : uniqueRawIndexeVector) {
2729 Eigen::Map<const Eigen::VectorXf> rowVector(
2730 inputValues + inputOffsets[rawIndex], dims[0]);
2731 inputMatrix.row(rowIndex++) = rowVector;
2732 }
2733
2734 float* inputValues1 = inputMatrix.data();
2735
2736 torch::Tensor input =
2737 torch::from_blob(inputValues1, {numUniqueRows, dims[0]});
2738 torch::Tensor output_tensor = input;
2739 output_tensor = output_tensor.to(device_);
2740
2741 output_tensor =
2742 const_cast<torch::nn::Sequential&>(model_)->forward(output_tensor);
2743 output_tensor = output_tensor.to(torch::kCPU);
2744 // Append results to the output vector.
2745
2746 if (hasArgmax_) {
2747 auto arrayOutput = output->asFlatVector<int>();
2748 int* outputValues = arrayOutput->mutableRawValues<int>();
2749 auto int_tensor = output_tensor.to(torch::kInt);
2750 int* dataInt = int_tensor.data_ptr<int>();
2751
2752 rows.applyToSelected([&](vector_size_t row) {
2753 if (rowMap.find(row) == rowMap.end()) {
2754 throw std::runtime_error(
2755 "Mapped index not found for the result matrix.");
2756 }
2757 auto mappedIndexInResultMatrix = rowMap[row];
2758 outputValues[row] = dataInt[mappedIndexInResultMatrix];
2759 });
2760 } else {
2761 auto arrayOutput = output->as<ArrayVector>();
2762 auto sizes = arrayOutput->mutableSizes(rows.end());
2763 auto rawSizes = sizes->asMutable<int32_t>();
2764 auto offsets = arrayOutput->mutableOffsets(rows.end());
2765 auto rawOffsets = offsets->asMutable<int32_t>();
2766
2767 // Initialize sizes and offsets to zero.
2768 std::fill(rawSizes, rawSizes + rows.end(), 0);
2769 std::fill(rawOffsets, rawOffsets + rows.end(), 0);
2770
2771 auto elementsOutput = arrayOutput->elements();
2772 auto elementsPool = context.pool();
2773 auto baseOffset = elementsOutput->size();
2774 elementsOutput->resize(baseOffset + rows.end() * dims.back());
2775 float* outputValues = elementsOutput->values()->asMutable<float>();
2776 vector_size_t outputOffset = 0;
2777
2778 float* dataFloat = output_tensor.data_ptr<float>();
2779
2780 rows.applyToSelected([&](vector_size_t row) {
2781 if (rowMap.find(row) == rowMap.end()) {
2782 throw std::runtime_error(
2783 "Mapped index not found for the result matrix.");
2784 }
2785 auto mappedIndexInResultMatrix = rowMap[row];
2786 rawOffsets[row] = outputOffset;
2787 rawSizes[row] = dims.back();
2788 std::memcpy(
2789 outputValues + outputOffset,
2790 dataFloat + mappedIndexInResultMatrix * dims.back(),
2791 dims.back() * sizeof(float));
2792 outputOffset += dims.back();
2793 });
2794 arrayOutput->setElements(elementsOutput);
2795 }
2796 }
2797
2803 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
2804 return {
2805 exec::FunctionSignatureBuilder()
2806 .returnType("array(REAL)")
2807 .argumentType("array(REAL)")
2808 .build(),
2809 exec::FunctionSignatureBuilder()
2810 .returnType("INTEGER")
2811 .argumentType("array(REAL)")
2812 .argumentType("INTEGER")
2813 .build(),
2814 exec::FunctionSignatureBuilder()
2815 .returnType("INTEGER")
2816 .argumentType("array(REAL)")
2817 .argumentType("BIGINT")
2818 .build()};
2819 }
2820
2826 float* getTensor() const override {
2827 return new float[0];
2828 }
2829
2835 const std::vector<float*>& getWeights() const {
2836 return weights;
2837 }
2838
2844 const std::vector<float*>& getBias() const {
2845 return bias;
2846 }
2847
2853 std::string getFuncName() {
2854 return getName();
2855 }
2856
2862 static std::string getName() {
2863 return "complexTorchNN_GPU";
2864 }
2865
2871 std::vector<velox::dl::KernelType> getKernelTypes() const {
2872 return kernelTypes_;
2873 }
2874
2881 CostEstimate getCost(std::vector<int> inputDims) {
2882 return CostEstimate(0, inputDims[0], inputDims[1]);
2883 }
2884
2885private:
2886 std::vector<float*> weights;
2887 std::vector<float*> bias;
2888 std::vector<velox::dl::KernelType> kernelTypes_;
2889 bool hasArgmax_;
2890 std::string device_;
2891 torch::nn::Sequential model_;
2892};
2893
2900public:
2910 std::string kernel,
2911 float* weights,
2912 float* bias,
2913 std::vector<int> dimensions) {
2914 this->kernel = kernel;
2915 this->weights = weights;
2916 this->bias = bias;
2917 dims = dimensions;
2918 }
2919
2931 void apply(
2932 const SelectivityVector& rows,
2933 std::vector<VectorPtr>& args,
2934 const TypePtr& type,
2935 exec::EvalCtx& context,
2936 VectorPtr& output) const override {
2937 std::vector<torch::nn::Linear> dense_layers;
2938 std::vector<torch::Tensor> weights_tensors;
2939 std::vector<torch::Tensor> bias_tensors;
2940 std::vector<torch::nn::ReLU> relus;
2941
2942 auto input_elements = args[0]->as<ArrayVector>()->elements();
2943 float* input_values = input_elements->values()->asMutable<float>();
2944 torch::Tensor input =
2945 torch::from_blob(input_values, {rows.size(), dims[0]});
2946 torch::Tensor output_tensor = input;
2947
2948 if (kernel == "Dense") {
2949 torch::nn::Linear denseLayer = torch::nn::Linear(dims[0], dims[1]);
2950 torch::Tensor weightsTensor =
2951 torch::from_blob(weights, {dims[0], dims[1]}).t();
2952 torch::Tensor biasTensor = torch::from_blob(bias, {dims[1]});
2953 denseLayer->weight.set_data(weightsTensor);
2954 denseLayer->bias.set_data(biasTensor);
2955
2956 output_tensor = denseLayer->forward(output_tensor);
2957 } else if (kernel == "Relu") {
2958 torch::nn::ReLU reluLayer = torch::nn::ReLU();
2959
2960 output_tensor = reluLayer->forward(output_tensor);
2961 } else if (kernel == "Softmax") {
2962 output_tensor = torch::nn::functional::softmax(output_tensor, 1);
2963 }
2964
2965 // output_tensor = torch::nn::functional::softmax(output_tensor, 1);
2966 float* data = output_tensor.data_ptr<float>();
2967
2968 // Prepare results
2969 std::vector<std::vector<float>> results;
2970 for (int i = 0; i < rows.size(); ++i) {
2971 std::vector<float> result(
2972 data + i * dims.back(), data + (i + 1) * dims.back());
2973 results.push_back(result);
2974 }
2975
2976 VectorMaker maker{context.pool()};
2977 output = maker.arrayVector<float>(results, REAL());
2978 }
2979
2985 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
2986 return {exec::FunctionSignatureBuilder()
2987 .returnType("array(REAL)")
2988 .argumentType("array(REAL)")
2989 .build()};
2990 }
2991
2997 float* getTensor() const override {
2998 return new float[0];
2999 }
3000
3006 const float* getWeights() const {
3007 return weights;
3008 }
3009
3015 const float* getBias() const {
3016 return bias;
3017 }
3018
3024 std::string getFuncName() {
3025 return getName();
3026 }
3027
3033 static std::string getName() {
3034 return "torchnn_kernel";
3035 }
3036
3037private:
3038 float* weights;
3039 float* bias;
3040 std::string kernel;
3041 std::vector<int> dims;
3042};
3043
3051public:
3060 std::vector<float*> weights,
3061 std::vector<float*> bias,
3062 std::vector<int> dimensions) {
3063 this->weights = weights;
3064 this->bias = bias;
3065 dims = dimensions;
3066 }
3067
3079 void apply(
3080 const SelectivityVector& rows,
3081 std::vector<VectorPtr>& args,
3082 const TypePtr& type,
3083 exec::EvalCtx& context,
3084 VectorPtr& output) const override {
3085 std::vector<torch::nn::Linear> dense_layers;
3086 std::vector<torch::Tensor> weights_tensors;
3087 std::vector<torch::Tensor> bias_tensors;
3088 std::vector<torch::nn::ReLU> relus;
3089
3090 // Create layers
3091 for (int i = 0; i < dims.size() - 1; ++i) {
3092 dense_layers.push_back(torch::nn::Linear(dims[i], dims[i + 1]));
3093 weights_tensors.push_back(
3094 torch::from_blob(weights[i], {dims[i], dims[i + 1]}).t());
3095 bias_tensors.push_back(torch::from_blob(bias[i], {dims[i + 1]}));
3096 relus.push_back(torch::nn::ReLU());
3097 }
3098
3099 // Set weights and biases
3100 for (int i = 0; i < dense_layers.size(); ++i) {
3101 dense_layers[i]->weight.set_data(weights_tensors[i]);
3102 dense_layers[i]->bias.set_data(bias_tensors[i]);
3103 }
3104
3105 auto input_elements = args[0]->as<ArrayVector>()->elements();
3106 float* input_values = input_elements->values()->asMutable<float>();
3107 torch::Tensor input =
3108 torch::from_blob(input_values, {rows.size(), dims[0]});
3109
3110 torch::Tensor output_tensor = input;
3111 for (int i = 0; i < dense_layers.size(); ++i) {
3112 output_tensor = dense_layers[i]->forward(output_tensor);
3113 output_tensor = relus[i]->forward(output_tensor);
3114 }
3115
3116 // Softmax output
3117 output_tensor = torch::nn::functional::softmax(output_tensor, 1);
3118 float* data = output_tensor.data_ptr<float>();
3119
3120 // Prepare results
3121 std::vector<std::vector<float>> results;
3122 for (int i = 0; i < rows.size(); ++i) {
3123 std::vector<float> result(
3124 data + i * dims.back(), data + (i + 1) * dims.back());
3125 results.push_back(result);
3126 }
3127
3128 VectorMaker maker{context.pool()};
3129 output = maker.arrayVector<float>(results, REAL());
3130 }
3131
3137 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
3138 return {exec::FunctionSignatureBuilder()
3139 .returnType("array(REAL)")
3140 .argumentType("array(REAL)")
3141 .build()};
3142 }
3143
3149 float* getTensor() const override {
3150 return new float[0];
3151 }
3152
3158 const std::vector<float*>& getWeights() const {
3159 return weights;
3160 }
3161
3167 const std::vector<float*>& getBias() const {
3168 return bias;
3169 }
3170
3171private:
3172 std::vector<float*> weights;
3173 std::vector<float*> bias;
3174 std::vector<int> dims;
3175};
3176
3183class Convolute : public MLFunction {
3184 public:
3196 Convolute(float* weights, int* dims_) {
3197 weights_ = weights;
3198 for (int i = 0; i < 6; i++)
3199 dims.push_back(dims_[i]);
3200 }
3201
3210 void apply(
3211 const SelectivityVector& rows,
3212 std::vector<VectorPtr>& args,
3213 const TypePtr& type,
3214 exec::EvalCtx& context,
3215 VectorPtr& output) const override {
3216 BaseVector::ensureWritable(rows, type, context.pool(), output);
3217
3218 auto input_elements = args[0]->as<ArrayVector>()->elements();
3219 float* input_values = input_elements->values()->asMutable<float>();
3220
3221 int input_height = dims[4];
3222 int input_width = dims[5];
3223 int input_channel_size = input_height * input_width;
3224 int input_size = input_channel_size * dims[3];
3225
3226 int filter_channel_size = dims[1] * dims[2];
3227 int filter_size = filter_channel_size * dims[3];
3228
3229 int output_height = input_height - dims[1] + 1;
3230 int output_width = input_width - dims[2] + 1;
3231
3232 std::vector<std::vector<float>> results(
3233 rows.size(),
3234 std::vector<float>(output_height * output_width * dims[0]));
3235
3236 std::chrono::steady_clock::time_point begin =
3237 std::chrono::steady_clock::now();
3238
3239 for (int s = 0; s < rows.size(); s++) {
3240 // for each channel
3241 for (int c = 0; c < dims[3]; c++) {
3242 Eigen::Map<
3243 Eigen::
3244 Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
3245 input(
3246 input_values + s * input_size + c * input_channel_size,
3247 input_height,
3248 input_width);
3249 // for every filter
3250 for (int f = 0; f < dims[0]; f++) {
3251 int filter_offset = f * output_height * output_width;
3252 Eigen::Map<Eigen::Matrix<
3253 float,
3254 Eigen::Dynamic,
3255 Eigen::Dynamic,
3256 Eigen::RowMajor>>
3257 kernel(
3258 weights_ + f * filter_size + c * filter_channel_size,
3259 dims[1],
3260 dims[2]);
3261 for (int i = 0; i < output_height; ++i) {
3262 int offset = filter_offset + i * output_width;
3263 for (int j = 0; j < output_width; ++j) {
3264 results[s][offset + j] +=
3265 (input.block(i, j, dims[1], dims[2]).cwiseProduct(kernel))
3266 .sum();
3267 }
3268 }
3269 }
3270 }
3271 }
3272
3273 std::chrono::steady_clock::time_point end =
3274 std::chrono::steady_clock::now();
3275 std::cout << "Time for conv2d (sec) = "
3276 << (std::chrono::duration_cast<std::chrono::microseconds>(
3277 end - begin)
3278 .count()) /
3279 1000000.0
3280 << std::endl;
3281
3282 VectorMaker maker{context.pool()};
3283 output = maker.arrayVector<float>(results, REAL());
3284 }
3285
3290 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
3291 return {exec::FunctionSignatureBuilder()
3292 .returnType("array(REAL)")
3293 .argumentType("array(REAL)")
3294 .build()};
3295 }
3296
3301 float* getTensor() const override {
3302 return weights_;
3303 }
3304
3309 std::string getFuncName() {
3310 return "conv2d";
3311 };
3312
3317 static std::string getName() {
3318 return "conv2d";
3319 };
3320
3321 private:
3322 float* weights_;
3323 std::vector<int> dims;
3324};
3325
3332public:
3339 TorchConvolute(float* weights, int* dims_) {
3340 weights_ = weights;
3341 for (int i = 0; i < 6; i++)
3342 dims.push_back(dims_[i]);
3343 }
3344
3356 void apply(
3357 const SelectivityVector& rows,
3358 std::vector<VectorPtr>& args,
3359 const TypePtr& type,
3360 exec::EvalCtx& context,
3361 VectorPtr& output) const override {
3362 std::chrono::steady_clock::time_point begin =
3363 std::chrono::steady_clock::now();
3364 BaseVector::ensureWritable(rows, type, context.pool(), output);
3365
3366 auto input_elements = args[0]->as<ArrayVector>()->elements();
3367 float* input_values = input_elements->values()->asMutable<float>();
3368
3369 int input_height = dims[4];
3370 int input_width = dims[5];
3371
3372 int output_height = input_height - dims[1] + 1;
3373 int output_width = input_width - dims[2] + 1;
3374
3375 std::vector<std::vector<float>> results(
3376 rows.size(),
3377 std::vector<float>(output_height * output_width * dims[0]));
3378
3379 torch::nn::Conv2d conv_layer(
3380 torch::nn::Conv2dOptions(dims[3], dims[0], {dims[1], dims[2]}));
3381 // torch::Tensor conv_weights = torch::tensor(weights_).view({dims[3],
3382 // dims[0], dims[1], dims[2]});
3383
3384 // conv_layer->weight = torch::nn::parameter::Parameter (conv_weights);
3385 torch::Tensor input_data = torch::from_blob(
3386 input_values, {rows.size(), dims[3], input_height, input_width});
3387
3388 torch::Tensor output_data = conv_layer(input_data);
3389
3390 float* data = output_data.data_ptr<float>();
3391
3392 int row_size = output_height * output_width * dims[0];
3393
3394 for (int i = 0; i < rows.size(); ++i) {
3395 std::vector<float> result;
3396 for (int j = 0; j < row_size; ++j) {
3397 result.push_back(data[i * row_size + j]);
3398 }
3399 results.push_back(result);
3400 }
3401
3402 VectorMaker maker{context.pool()};
3403 output = maker.arrayVector<float>(results, REAL());
3404 std::chrono::steady_clock::time_point end =
3405 std::chrono::steady_clock::now();
3406 }
3407
3413 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
3414 return {exec::FunctionSignatureBuilder()
3415 .returnType("array(REAL)")
3416 .argumentType("array(REAL)")
3417 .build()};
3418 }
3419
3425 float* getTensor() const override {
3426 return weights_;
3427 }
3428
3434 std::string getFuncName() {
3435 return getName();
3436 }
3437
3443 static std::string getName() {
3444 return "torchconv2d";
3445 }
3446
3447private:
3448 float* weights_;
3449 std::vector<int> dims;
3450};
3451
3458class TorchCNN : public MLFunction {
3459public:
3467 TorchCNN(float* weights, float* bias, int* dims_) {
3468 weights_ = weights;
3469 bias_ = bias;
3470 for (int i = 0; i < 7; i++)
3471 dims.push_back(dims_[i]);
3472 }
3473
3485 void apply(
3486 const SelectivityVector& rows,
3487 std::vector<VectorPtr>& args,
3488 const TypePtr& type,
3489 exec::EvalCtx& context,
3490 VectorPtr& output) const override {
3491 std::chrono::steady_clock::time_point begin =
3492 std::chrono::steady_clock::now();
3493 BaseVector::ensureWritable(rows, type, context.pool(), output);
3494
3495 auto input_elements = args[0]->as<ArrayVector>()->elements();
3496 float* input_values = input_elements->values()->asMutable<float>();
3497
3498 int input_height = dims[4];
3499 int input_width = dims[5];
3500
3501 int output_height = input_height - dims[1] + 1;
3502 int output_width = input_width - dims[2] + 1;
3503
3504 int input_size = input_elements->size();
3505 // std::cout << "input_size:" << "," << input_size << std::endl;
3506 // std::cout << "input_values:" << "," << input_values[0] << "," <<
3507 // input_values[1] << "," << input_values[2080] << std::endl; std::cout <<
3508 // "row size" << "," << rows.size() << std::endl;
3509
3510 std::vector<std::vector<float>> results(
3511 rows.size(),
3512 std::vector<float>(output_height * output_width * dims[0]));
3513
3514 torch::nn::Conv2d conv_layer(
3515 torch::nn::Conv2dOptions(dims[0], dims[3], {dims[1], dims[2]})
3516 .bias(false));
3517 // torch::nn::Conv2d conv_layer(torch::nn::Conv2dOptions(dims[3], dims[0],
3518 // {dims[1], dims[2]}));
3519 torch::Tensor conv_weights =
3520 torch::from_blob(weights_, {dims[3], dims[0], dims[1], dims[2]})
3521 .to(torch::kFloat);
3522
3523 auto parameters = conv_layer->named_parameters();
3524
3525 // Find and set the weight parameter
3526 for (auto& named_param : parameters) {
3527 if (named_param.key() == "weight") {
3528 named_param.value().data() = conv_weights;
3529 break;
3530 }
3531 }
3532 torch::Tensor input_data =
3533 torch::from_blob(
3534 input_values, {rows.size(), dims[3], input_height, input_width})
3535 .to(torch::kFloat);
3536
3537 torch::Tensor output_data = conv_layer->forward(input_data);
3538
3539 // Convert bias values to a tensor
3540 torch::Tensor bias_tensor = torch::from_blob(bias_, {dims[0]});
3541 if (conv_layer->bias.defined()) {
3542 output_data += bias_tensor;
3543 }
3544
3545 // output_data = torch::relu(output_data);
3546
3547 // output_data = torch::max_pool2d(output_data, {dims[6], dims[6]});
3548
3549 float* data = output_data.data_ptr<float>();
3550
3551 int row_size = output_height * output_width * dims[0];
3552
3553 for (int i = 0; i < rows.size(); ++i) {
3554 std::vector<float> result;
3555 for (int j = 0; j < row_size; ++j) {
3556 result.push_back(data[i * row_size + j]);
3557 }
3558 results.push_back(result);
3559 }
3560
3561 // for (auto entry: results) {
3562 // for (int i =0; i < 1000; i++){
3563 // std::cout << entry[i] << std::endl;
3564 // }
3565 // }
3566
3567 VectorMaker maker{context.pool()};
3568 output = maker.arrayVector<float>(results, REAL());
3569 std::chrono::steady_clock::time_point end =
3570 std::chrono::steady_clock::now();
3571 }
3572
3578 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
3579 return {exec::FunctionSignatureBuilder()
3580 .returnType("array(REAL)")
3581 .argumentType("array(REAL)")
3582 .build()};
3583 }
3584
3590 float* getTensor() const override {
3591 return weights_;
3592 }
3593
3599 float* getWeights() const {
3600 return weights_;
3601 }
3602
3608 float* getBias() const {
3609 return bias_;
3610 }
3611
3617 std::string getFuncName() {
3618 return getName();
3619 }
3620
3626 static std::string getName() {
3627 return "torchcnn";
3628 }
3629
3630private:
3631 float* weights_;
3632 float* bias_;
3633 std::vector<int> dims;
3634};
3635
3642public:
3649 VectorScalarAddition(float* weights, int size) {
3650 weights_ = weights;
3651 dims.push_back(size);
3652 }
3653
3665 void apply(
3666 const SelectivityVector& rows,
3667 std::vector<VectorPtr>& args,
3668 const TypePtr& type,
3669 exec::EvalCtx& context,
3670 VectorPtr& output) const override {
3671 BaseVector::ensureWritable(rows, type, context.pool(), output);
3672
3673 auto input_elements = args[0]->as<ArrayVector>()->elements();
3674 float* input_values = input_elements->values()->asMutable<float>();
3675 int num_cols = input_elements->size() / rows.size();
3676
3677 Eigen::Map<
3678 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
3679 input(input_values, rows.size(), num_cols);
3680 // for each filter add bias
3681 for (int i = 0, step = num_cols / dims[0]; i < dims[0]; i++) {
3682 input.block(0, i * step, rows.size(), step).array() += weights_[i];
3683 }
3684
3685 std::vector<std::vector<float>> results(
3686 input.rows(), std::vector<float>(input.cols()));
3687 for (int i = 0; i < input.rows(); ++i) {
3688 for (int j = 0; j < input.cols(); ++j) {
3689 results[i][j] = input(i, j);
3690 }
3691 }
3692 VectorMaker maker{context.pool()};
3693 output = maker.arrayVector<float>(results, REAL());
3694 }
3695
3701 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
3702 return {exec::FunctionSignatureBuilder()
3703 .returnType("array(REAL)")
3704 .argumentType("array(REAL)")
3705 .build()};
3706 }
3707
3713 float* getTensor() const override {
3714 return weights_;
3715 }
3716
3722 std::string getFuncName() {
3723 return getName();
3724 }
3725
3731 static std::string getName() {
3732 return "vec_scal_add";
3733 }
3734
3735private:
3736 float* weights_;
3737};
3738
3744class MaxPool : public MLFunction {
3745public:
3753 MaxPool(int side, int rows, int cols) {
3754 dims.push_back(side);
3755 dims.push_back(rows);
3756 dims.push_back(cols);
3757 }
3758
3770 void apply(
3771 const SelectivityVector& rows,
3772 std::vector<VectorPtr>& args,
3773 const TypePtr& type,
3774 exec::EvalCtx& context,
3775 VectorPtr& output) const override {
3776 BaseVector::ensureWritable(rows, type, context.pool(), output);
3777
3778 auto input_elements = args[0]->as<ArrayVector>()->elements();
3779 float* input_values = input_elements->values()->asMutable<float>();
3780 int num_cols = input_elements->size() / rows.size();
3781 int num_channels = num_cols / (dims[1] * dims[2]);
3782 int side = dims[0];
3783 int output_size = (dims[1] * dims[2]) / (side * side);
3784 int output_rows = dims[1] / side;
3785 int output_cols = dims[2] / side;
3786 // this can be done by using one big matrix but padding will not be possible
3787 // then this doesn't support padding yet but this makes it possible to add
3788 // it later
3789 std::vector<std::vector<float>> results(
3790 rows.size(), std::vector<float>(num_cols / (side * side)));
3791 // for each sample
3792 for (int s = 0; s < rows.size(); s++) {
3793 for (int c = 0; c < num_channels; c++) {
3794 Eigen::Map<
3795 Eigen::
3796 Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
3797 input(
3798 input_values + s * num_cols + c * dims[1] * dims[2],
3799 dims[1],
3800 dims[2]);
3801 for (int i = 0; i < output_rows; i++) {
3802 for (int j = 0; j < output_cols; j++) {
3803 results[s][c * output_size + i * output_cols + j] =
3804 input.block(i * side, j * side, side, side).maxCoeff();
3805 }
3806 }
3807 }
3808 }
3809
3810 // for (const auto& inner_vector : results) {
3811 // // Iterate over each element in the inner vector
3812 // for (const auto& element : inner_vector) {
3813 // std::cout << element << std::endl;
3814 // }
3815 // }
3816
3817 // for(int i=0; i < 64; i++){
3818
3819 // for(int j=0; j < 144; j++){
3820 // if(j % 12 == 0)
3821 // std::cout << std::endl;
3822 // std::cout << results[0][i*144 + j];
3823 // }
3824
3825 // }
3826
3827 VectorMaker maker{context.pool()};
3828 output = maker.arrayVector<float>(results, REAL());
3829 }
3830
3836 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
3837 return {exec::FunctionSignatureBuilder()
3838 .returnType("array(REAL)")
3839 .argumentType("array(REAL)")
3840 .build()};
3841 }
3842
3848 float* getTensor() const override {
3849 return new float[0];
3850 }
3851
3857 std::string getFuncName() {
3858 return getName();
3859 }
3860
3866 static std::string getName() {
3867 return "max_pool";
3868 }
3869
3870private:
3871 std::vector<int> dims;
3872};
Implementation of a decision tree for machine learning predictions.
Implementation of a dot product function for machine learning.
Implementation of a dropout layer for machine learning.
Implementation of an embedding layer for machine learning.
Implementation of various encoder classes for machine learning.
Implementation of a Hugging Face serverless API integration for machine learning tasks.
Implementation of a Hugging Face tokenizer for machine learning tasks.
Implementation of a position encoding function for machine learning.
Implementation of a Retrieval-Augmented Generation (RAG) function for machine learning.
Implementation of a sequence pooling function for machine learning.
This file contains the implementation of XGBoost-based machine learning functions in Velox.
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1622
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1643
Argmax()
Default constructor.
Definition functions.h:1529
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1634
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the Argmax function to the input array.
Definition functions.h:1542
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the Argmax function.
Definition functions.h:1662
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1652
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3309
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the 2D convolution operation to the input data.
Definition functions.h:3210
float * getTensor() const override
Returns the filter weights tensor.
Definition functions.h:3301
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures for the convolution operation.
Definition functions.h:3290
static std::string getName()
Returns the name of the function.
Definition functions.h:3317
Convolute(float *weights, int *dims_)
Constructs a new Convolute object.
Definition functions.h:3196
A base class for machine learning functions, inheriting from Velox's VectorFunction.
Definition BaseFunction.h:9
double getWeightedCost(std::string name, float cost)
Calculates the weighted cost of the function.
Definition BaseFunction.h:70
std::vector< double > getCoefficientVector(std::string name)
Retrieves the cost coefficients for the function.
Definition BaseFunction.h:83
std::vector< int > dims
Dimensions of the function.
Definition BaseFunction.h:61
static std::string getName()
Get the name of the function.
Definition functions.h:866
std::string getFuncName()
Get the name of the function.
Definition functions.h:858
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for matrix addition.
Definition functions.h:839
std::string getWeightsFile()
Get the weights file associated with this function.
Definition functions.h:874
void setWeights(float *weights)
Set the weights for this function.
Definition functions.h:882
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:850
MatrixAddition(std::string weightsFile, int num_cols)
Constructor for MatrixAddition.
Definition functions.h:792
MatrixAddition(float *weights, int num_cols)
Constructor for MatrixAddition.
Definition functions.h:782
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Apply the matrix addition operation.
Definition functions.h:805
CostEstimate getCost(std::vector< int > inputDims)
Estimate the computational cost of the function.
Definition functions.h:891
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for blocked matrix multiplication.
Definition functions.h:733
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:745
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Apply the blocked matrix multiplication operation.
Definition functions.h:683
static std::string getName()
Get the name of the function.
Definition functions.h:761
MatrixMultiply_Block(int num_rows, int num_cols, int num_samples, int blocks)
Constructor for MatrixMultiply_Block.
Definition functions.h:664
std::string getFuncName()
Get the name of the function.
Definition functions.h:753
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Apply the blocked matrix multiplication operation.
Definition functions.h:363
static std::string getName()
Get the name of the function.
Definition functions.h:451
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:435
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for blocked matrix multiplication.
Definition functions.h:423
std::string getFuncName()
Get the name of the function.
Definition functions.h:443
MatrixMultiply_b(int num_rows, int num_cols, int num_samples, int blocks)
Constructor for MatrixMultiply_b.
Definition functions.h:348
std::string getFuncName()
Get the name of the function.
Definition functions.h:616
CostEstimate getCost(std::vector< int > inputDims)
Estimate the computational cost of the function.
Definition functions.h:633
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for hierarchical matrix multiplication.
Definition functions.h:596
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:608
static std::string getName()
Get the name of the function.
Definition functions.h:624
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &outputType, exec::EvalCtx &context, VectorPtr &output) const override
Apply the hierarchical matrix multiplication operation.
Definition functions.h:490
MatrixMultiply_h(int num_rows, int num_cols, int block_size)
Constructor for MatrixMultiply_h.
Definition functions.h:476
std::string getWeightsFile()
Get the weights file associated with this function.
Definition functions.h:299
void setWeights(float *weights)
Set the weights for this function.
Definition functions.h:307
std::string getFuncName()
Get the name of the function.
Definition functions.h:283
CostEstimate getCost(std::vector< int > inputDims)
Estimate the computational cost of the function.
Definition functions.h:316
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &outputType, exec::EvalCtx &context, VectorPtr &output) const override
Apply the matrix multiplication operation.
Definition functions.h:150
static std::string getName()
Get the name of the function.
Definition functions.h:291
MatrixMultiply(std::string weightsFile, int num_rows, int num_cols)
Constructor for MatrixMultiply.
Definition functions.h:136
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:275
MatrixMultiply(float *weights, int num_rows, int num_cols)
Constructor for MatrixMultiply.
Definition functions.h:122
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for matrix multiplication.
Definition functions.h:257
std::string getWeightsFile()
Returns the path to the weights file.
Definition functions.h:1089
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1041
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the matrix-vector addition operation.
Definition functions.h:946
MatrixVectorAddition(std::string weightsFile, int num_cols)
Constructor that initializes the class with a file containing weights.
Definition functions.h:930
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1071
void setWeights(float *weights)
Sets the weights for the function.
Definition functions.h:1098
MatrixVectorAddition(float *weights, int num_cols)
Constructor that initializes the class with a raw array of weights.
Definition functions.h:917
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1080
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1050
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3866
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3857
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies max pooling to the input array.
Definition functions.h:3770
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3848
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3836
MaxPool(int side, int rows, int cols)
Constructor that initializes the max pooling operation with dimensions.
Definition functions.h:3753
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1864
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1873
MinMaxScaler(std::string minMaxScalerDataPath)
Constructor that initializes the scaler with a file containing min and max values.
Definition functions.h:1700
MinMaxScaler(float *scalerMinValues, float *scalerMaxValues, int numCols)
Constructor that initializes the scaler with raw arrays of min and max values.
Definition functions.h:1687
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying Min-Max scaling.
Definition functions.h:1892
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies Min-Max scaling to the input array.
Definition functions.h:1755
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1852
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1882
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1305
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the ReLU function.
Definition functions.h:1345
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1326
static float reluFunction(float x)
Computes the ReLU function for a single input value.
Definition functions.h:1249
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the ReLU function to the input array.
Definition functions.h:1265
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1317
Relu()
Default constructor.
Definition functions.h:1241
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1335
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the Sigmoid function.
Definition functions.h:1223
static float sigmoidFunction(float x)
Computes the Sigmoid function for a single input value.
Definition functions.h:1127
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1183
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1195
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1213
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the Sigmoid function to the input array.
Definition functions.h:1143
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1204
Sigmoid()
Default constructor.
Definition functions.h:1119
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the Softmax function.
Definition functions.h:1511
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1483
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1471
Softmax()
Default constructor.
Definition functions.h:1364
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1501
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the Softmax function to the input array.
Definition functions.h:1377
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1492
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3578
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3626
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3617
float * getBias() const
Returns the biases of the CNN.
Definition functions.h:3608
float * getWeights() const
Returns the weights of the CNN.
Definition functions.h:3599
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the CNN to the input array using PyTorch.
Definition functions.h:3485
TorchCNN(float *weights, float *bias, int *dims_)
Constructor that initializes the CNN with weights, biases, and dimensions.
Definition functions.h:3467
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3590
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the 2D convolution operation to the input array using PyTorch.
Definition functions.h:3356
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3443
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3434
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3413
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3425
TorchConvolute(float *weights, int *dims_)
Constructor that initializes the convolution operation with weights and dimensions.
Definition functions.h:3339
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the 2-level neural network to the input array.
Definition functions.h:1935
TorchDNN2Level(float **weights, float **bias, std::vector< int > dimensions)
Constructor that initializes the neural network with weights and biases.
Definition functions.h:1918
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2003
float ** getBias() const
Returns the biases of the neural network.
Definition functions.h:2021
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2049
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2039
float ** getWeights() const
Returns the weights of the neural network.
Definition functions.h:2012
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1991
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2030
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2985
TorchDNNKernel(std::string kernel, float *weights, float *bias, std::vector< int > dimensions)
Constructor that initializes the neural network kernel with weights, biases, and dimensions.
Definition functions.h:2909
const float * getWeights() const
Returns the weights of the neural network kernel.
Definition functions.h:3006
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network kernel to the input array.
Definition functions.h:2931
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2997
const float * getBias() const
Returns the biases of the neural network kernel.
Definition functions.h:3015
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3033
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3024
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2853
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2881
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:2835
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:2844
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2862
std::vector< velox::dl::KernelType > getKernelTypes() const
Returns the kernel types used in the neural network.
Definition functions.h:2871
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2803
TorchDNNV2CUDA(std::vector< velox::dl::KernelType > kernelTypes, std::vector< float * > weights, std::vector< int > dimensions)
Constructor that initializes the neural network with kernel types, weights, and dimensions.
Definition functions.h:2619
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network to the input array using CUDA.
Definition functions.h:2684
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2826
TorchDNNV2(std::vector< velox::dl::KernelType > kernelTypes, std::vector< float * > weights, std::vector< int > dimensions)
Constructor that initializes the neural network with kernel types, weights, and dimensions.
Definition functions.h:2330
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2565
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2574
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2515
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network to the input array.
Definition functions.h:2399
std::vector< velox::dl::KernelType > getKernelTypes() const
Returns the kernel types used in the neural network.
Definition functions.h:2583
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:2556
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2593
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2538
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:2547
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3149
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3137
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:3158
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the multi-layer neural network to the input array.
Definition functions.h:3079
TorchDNN_Multi(std::vector< float * > weights, std::vector< float * > bias, std::vector< int > dimensions)
Constructor that initializes the neural network with weights, biases, and layer dimensions.
Definition functions.h:3059
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:3167
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2192
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:2174
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:2183
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2153
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2201
TorchDNN(std::vector< float * > weights, std::vector< float * > bias, std::vector< int > dimensions)
Constructor that initializes the neural network with weights, biases, and layer dimensions.
Definition functions.h:2075
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network to the input array.
Definition functions.h:2095
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2165
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2211
VectorScalarAddition(float *weights, int size)
Constructor that initializes the vector-scalar addition with weights and size.
Definition functions.h:3649
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3731
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3701
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3713
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3722
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies vector-scalar addition to the input array.
Definition functions.h:3665
Namespace for deep learning-related utilities and kernels.
KernelType
Enumeration of kernel types used in deep learning operations.
Definition functions.h:2253
@ Sigmoid
Sigmoid activation kernel.
Definition functions.h:2260
@ Argmax
Argmax operation kernel.
Definition functions.h:2259
@ Softmax
Softmax activation kernel.
Definition functions.h:2257
@ MatMul
Matrix multiplication kernel.
Definition functions.h:2254
@ BatchNorm
Batch normalization kernel.
Definition functions.h:2258
@ ReLU
Rectified Linear Unit activation kernel.
Definition functions.h:2256
@ MatAdd
Matrix addition kernel.
Definition functions.h:2255
std::ostream & operator<<(std::ostream &os, KernelType kernelType)
Overloads the << operator for KernelType.
Definition functions.h:2295
std::string kernelTypeToString(KernelType kernelType)
Converts a KernelType enum value to its string representation.
Definition functions.h:2268