cactusdb.github.io/functions_8h_source.html

/*

 * Copyright (c) 2025 ASU Cactus Lab.

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#pragma once

#include <torch/torch.h>

#include <Eigen/Dense>

#include <chrono>

#include <filesystem>

#include "BaseFunction.h"

#include "BatchNorm.h"

#include "ChatGPT.h"

#include "ComplexLayer.h"

#include "Concat.h"

#include "CosineSimilarity.h"

#include "DecisionForest.h"

#include "DecisionTree.h"

#include "DotProduct.h"

#include "Dropout.h"

#include "Embedding.h"

#include "Encoder.h"

#include "HuggingFaceServerless.h"

#include "HuggingFaceTokenizer.h"

#include "PositionEncoding.h"

#include "RAG.h"

#include "SequencePooling.h"

#include "XGBoost.h"

#include "velox/vector/tests/utils/VectorMaker.h"


using namespace facebook::velox;

using namespace facebook::velox::test;


/*

    TODO

    1. conv2d - done

    2. max pooling - done

    3. flatten - not required

    4. batch normalization

    5. padding

    6. concatenate

    7. embedding

    8. transformer -> exiting libraries, encoder, decoder, how to decompoose it

   into atomic linear algebra

    // focus on weight

    9. GRU -> not interesting

*/


// TODO: Refactor

// class MLFunction : public exec::VectorFunction {

//  public:

//   virtual ~MLFunction() = default;


//   virtual float* getTensor() const = 0;


//   virtual std::vector<int> getDims() {

//     return dims;

//   }


//   virtual std::string getFuncName() {

//     return "";

//   }


//   virtual int getNumDims() {

//     return dims.size();

//   }


//   virtual CostEstimate getCost(std::vector<int> inputDims) {

//     return CostEstimate(0, inputDims[0], inputDims[1]);

//   }


//  protected:

//   std::vector<int> dims;

//   double getWeightedCost(std::string name, float cost) {

//     std::vector<double> coefficient =

//         UdfCostCoefficient::getInstance().getCoefficient(name);

//     // FIXME

//     return 0;

//     // return coefficient[0] * cost;

//   }

//   std::vector<double> getCoefficientVector(std::string name) {

//     return UdfCostCoefficient::getInstance().getCoefficient(name);

//   }

// };


class MatrixMultiply : public MLFunction {

 public:


  MatrixMultiply(float* weights, int num_rows, int num_cols) {

    // Create a deep copy of the weights.

    weights_ = new float[num_rows * num_cols];

    std::memcpy(weights_, weights, num_rows * num_cols * sizeof(float));

    dims.push_back(num_rows);

    dims.push_back(num_cols);

  }


  MatrixMultiply(std::string weightsFile, int num_rows, int num_cols) {

    weightsFile_ = weightsFile;

    dims.push_back(num_rows);

    dims.push_back(num_cols);

  }


  void apply(

      const SelectivityVector& rows,

      std::vector<VectorPtr>& args,

      const TypePtr& outputType,

      exec::EvalCtx& context,

      VectorPtr& output) const override {

    bool use_gpu = false;

    if (args.size() == 2) {

      // An optional parameter can be passed to enable the GPU for matrix multiplication.

      use_gpu = args[1]->as<ConstantVector<bool>>()->valueAt(0);

    }

    if (use_gpu) {

      // TODO: Implement GPU matrix multiplication.

      throw std::runtime_error(

          "GPU implementation of Matrix Multiplication is not implemented.");

    } else {

      // Ensure output vector is writable.

      context.ensureWritable(rows, outputType, output);

      output->clearNulls(rows);

      auto arrayOutput = output->as<ArrayVector>();

      auto sizes = arrayOutput->mutableSizes(rows.end());

      auto rawSizes = sizes->asMutable<int32_t>();

      auto offsets = arrayOutput->mutableOffsets(rows.end());

      auto rawOffsets = offsets->asMutable<int32_t>();


      // Initialize sizes and offsets to zero.

      std::fill(rawSizes, rawSizes + rows.end(), 0);

      std::fill(rawOffsets, rawOffsets + rows.end(), 0);


      auto elementsOutput = arrayOutput->elements();

      auto elementsPool = context.pool();


      // Perform matrix multiplication logic.

      exec::DecodedArgs decodedArgs(rows, args, context);

      auto decodedInput = decodedArgs.at(0);

      auto inputArray = decodedInput->base()->as<ArrayVector>();

      auto inputElements = inputArray->elements();

      float* inputValues = inputElements->values()->asMutable<float>();

      auto inputOffsets = inputArray->rawOffsets();

      auto inputSizes = inputArray->rawSizes();


      // The map between the row index in the input data and the row index in the output data.

      std::map<vector_size_t, vector_size_t> rowMap;

      // For efficient check.

      std::unordered_set<vector_size_t> uniqueRawIndexeSet;

      // For iterating over the insert ordering.

      std::vector<vector_size_t> uniqueRawIndexeVector;

      vector_size_t numUniqueRows = 0;

      rows.applyToSelected([&](vector_size_t row) {

        auto mappedIndexInRowData = decodedInput->index(row);

        if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

            uniqueRawIndexeSet.end()) {

          // Add it.

          rowMap[row] = numUniqueRows;

          uniqueRawIndexeSet.insert(mappedIndexInRowData);

          uniqueRawIndexeVector.push_back(mappedIndexInRowData);

          ++numUniqueRows;

        } else {

          // Already added.

          rowMap[row] = rowMap[mappedIndexInRowData];

        }

      });


      int numInputMatrixRows = numUniqueRows;

      Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);

      int rowIndex = 0;

      for (auto rawIndex : uniqueRawIndexeVector) {

        Eigen::Map<const Eigen::VectorXf> rowVector(

            inputValues + inputOffsets[rawIndex], dims[0]);

        inputMatrix.row(rowIndex++) = rowVector;

      }


      Eigen::Map<

          Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

          weightMatrix(weights_, dims[0], dims[1]);

      Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>

          resultMatrix = inputMatrix * weightMatrix;


      // Append results to the output vector.

      auto baseOffset = elementsOutput->size();

      elementsOutput->resize(baseOffset + rows.end() * dims[1]);


      float* outputValues = elementsOutput->values()->asMutable<float>();

      vector_size_t outputOffset = 0;

      rows.applyToSelected([&](vector_size_t row) {

        if (rowMap.find(row) == rowMap.end()) {

          throw std::runtime_error(

              "Mapped index not found for the result matrix.");

        }

        auto mappedIndexInResultMatrix = rowMap[row];

        rawOffsets[row] = outputOffset;

        rawSizes[row] = dims[1];

        std::memcpy(

            outputValues + outputOffset,

            resultMatrix.row(mappedIndexInResultMatrix).data(),

            dims[1] * sizeof(float));


        outputOffset += dims[1];

      });

      arrayOutput->setElements(elementsOutput);

    }

  }


  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

    return {

        exec::FunctionSignatureBuilder()

            .returnType("array(REAL)")

            .argumentType("array(REAL)")

            .build(),

        // Supports an additional flag: use_gpu.

        exec::FunctionSignatureBuilder()

            .returnType("array(REAL)")

            .argumentType("array(REAL)")

            .argumentType("BOOLEAN")

            .build()};

  }


  float* getTensor() const override {

    return weights_;

  }


  std::string getFuncName() {

    return getName();

  };


  static std::string getName() {

    return "mat_mul";

  };


  std::string getWeightsFile() {

    return weightsFile_;

  }


  void setWeights(float* weights) {

    weights_ = weights;

  }


  CostEstimate getCost(std::vector<int> inputDims) {

    std::vector<double> coefficientVector = getCoefficientVector(getName());

    int factor1 = inputDims[0];

    int factor2 = dims[0];

    int factor3 = dims[1];

    float cost = coefficientVector[0] * factor1 * factor2 * factor3 +

        coefficientVector[1] * factor1 + coefficientVector[2] * factor2 +

        coefficientVector[3] * factor3;

    return CostEstimate(cost, inputDims[0], dims[1]);

  }


 private:

  float* weights_;

  std::string weightsFile_;

};


class MatrixMultiply_b : public MLFunction {

 public:


  MatrixMultiply_b(int num_rows, int num_cols, int num_samples, int blocks) {

    dims.push_back(num_rows);

    dims.push_back(num_cols);

    dims.push_back(num_samples);

    dims.push_back(blocks);

  }


  void apply(

      const SelectivityVector& rows,

      std::vector<VectorPtr>& args,

      const TypePtr& type,

      exec::EvalCtx& context,

      VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);

    VectorMaker maker{context.pool()};


    BaseVector* left = args[0].get();

    BaseVector* right = args[1].get();


    exec::LocalDecodedVector leftHolder(context, *left, rows);

    auto decodedLeftArray = leftHolder.get();

    auto baseLeftArray =

        decodedLeftArray->base()->as<ArrayVector>()->elements();


    exec::LocalDecodedVector rightHolder(context, *right, rows);

    auto decodedRightArray = rightHolder.get();

    auto baseRightArray = rightHolder->base()->as<ArrayVector>()->elements();


    float* input_values_v = baseLeftArray->values()->asMutable<float>();

    float* input_values_w = baseRightArray->values()->asMutable<float>();


    // auto varrayVector = std::make_shared<ArrayVector<float>>();

    // const int elements_v_per_row = 1500000; //6000*250

    // const int elements_w_per_row = 125000; // 250*500


    // std::vector<std::vector<float>> result(1,

    // std::vector<float>(dims[1]*dims[2])); //6000*500

    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        m1(input_values_v, dims[2], dims[0]); // 3*2

    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        m2(input_values_w, dims[0], dims[1]); // 2*5

    Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> m =

        m1 * m2; // 3*5


    // for (int i = 0; i < m.rows(); ++i) {

    //         for (int j = 0; j < m.cols(); ++j) {

    //             result[0][i * dims[1] + j] = m(i, j);

    //     }

    // }

    // m = m.reshaped(1, m.size());

    // std::cout << "shape: " << m.rows() << "," <<m.cols() << std::endl;

    std::vector<std::vector<float>> result;

    for (int i = 0; i < m.rows(); i++) {

      std::vector<float> row(m.row(i).data(), m.row(i).data() + m.cols());

      result.push_back(row);

    }

    auto baseVector = maker.arrayVector<float>(result, REAL());

    auto arrayOfArrays = maker.arrayVector({0}, baseVector);

    output = arrayOfArrays;

  }


  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

    return {exec::FunctionSignatureBuilder()

                .returnType("array(array(REAL))")

                .argumentType("array(REAL)")

                .argumentType("array(REAL)")

                .build()};

  }


  float* getTensor() const override {

    return weights_;

  }


  std::string getFuncName() {

    return getName();

  };


  static std::string getName() {

    return "mat_mul_block";

  };


 private:

  float* weights_;

};


class MatrixMultiply_h : public MLFunction {

 public:


  MatrixMultiply_h(int num_rows, int num_cols, int block_size) {

    dims.push_back(num_rows);

    dims.push_back(num_cols);

    dims.push_back(block_size);

  }


  void apply(

      const SelectivityVector& rows,

      std::vector<VectorPtr>& args,

      const TypePtr& outputType,

      exec::EvalCtx& context,

      VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, outputType, context.pool(), output);

    output->clearNulls(rows);

    auto arrayOutput = output->as<ArrayVector>();

    auto sizes = arrayOutput->mutableSizes(rows.end());

    auto rawSizes = sizes->asMutable<int32_t>();

    auto offsets = arrayOutput->mutableOffsets(rows.end());

    auto rawOffsets = offsets->asMutable<int32_t>();


    // Initialize sizes and offsets to zero.

    std::fill(rawSizes, rawSizes + rows.end(), 0);

    std::fill(rawOffsets, rawOffsets + rows.end(), 0);


    auto elementsOutput = arrayOutput->elements();

    auto elementsPool = context.pool();

    VectorMaker maker{context.pool()};


    // Validate input arguments

    VELOX_CHECK_EQ(

        args.size(), 2, "Blocked-based matrix multiply requires 2 inputs");


    exec::DecodedArgs decodedArgs(rows, args, context);

    auto numInputs = rows.size();

    auto decodedInput1 = decodedArgs.at(0);

    auto decodedInput2 = decodedArgs.at(1);

    auto input1Array = decodedInput1->base()->as<ArrayVector>();

    auto input2Array = decodedInput2->base()->as<ArrayVector>();

    auto input1Elements = input1Array->elements();

    auto input1Offsets = input1Array->rawOffsets();

    auto input1Sizes = input1Array->rawSizes();

    auto input2Elements = input2Array->elements();


    float* input1Values = input1Elements->values()->asMutable<float>();

    float* input2Values = input2Elements->values()->asMutable<float>();


    int currentBlockSize = (input2Elements->size() < (dims[0] * dims[2]))

        ? input2Elements->size() / dims[0]

        : dims[2];

    int input1MatrixNumRow = input1Elements->size() / dims[0];


    std::map<vector_size_t, vector_size_t> rowMap;

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput1->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // Add it.

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

      } else {

        // Already added.

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          input1Values + input1Offsets[rawIndex], dims[0]);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        weightMatrix(input2Values, dims[0], currentBlockSize);

    Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>

        resultMatrix = inputMatrix * weightMatrix;


    auto baseOffset = elementsOutput->size();

    elementsOutput->resize(baseOffset + rows.end() * currentBlockSize);


    float* outputValues = elementsOutput->values()->asMutable<float>();


    vector_size_t outputOffset = 0;

    rows.applyToSelected([&](vector_size_t row) {

      if (rowMap.find(row) == rowMap.end()) {

        throw std::runtime_error(

            "Mapped index not found for the result matrix.");

      }

      auto mappedIndexInResultMatrix = rowMap[row];

      rawOffsets[row] = outputOffset;

      rawSizes[row] = currentBlockSize;

      std::memcpy(

          outputValues + outputOffset,

          resultMatrix.row(mappedIndexInResultMatrix).data(),

          currentBlockSize * sizeof(float));

      outputOffset += currentBlockSize;

    });

    arrayOutput->setElements(elementsOutput);

  }


  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

    return {exec::FunctionSignatureBuilder()

                .returnType("array(REAL)")

                .argumentType("array(REAL)")

                .argumentType("array(REAL)")

                .build()};

  }


  float* getTensor() const override {

    return weights_;

  }


  std::string getFuncName() {

    return getName();

  };


  static std::string getName() {

    return "mat_mul_h";

  };


  CostEstimate getCost(std::vector<int> inputDims) {

    std::vector<double> coefficientVector = getCoefficientVector("mat_mul");

    int factor1 = inputDims[0];

    int factor2 = inputDims[1];

    int factor3 = dims[2];

    float cost = coefficientVector[0] * factor1 * factor2 * factor3 +

        coefficientVector[1] * factor1 + coefficientVector[2] * factor2 +

        coefficientVector[3] * factor3;

    return CostEstimate(cost, inputDims[0], dims[2]);

  }


 private:

  float* weights_;

};


class MatrixMultiply_Block : public MLFunction {

 public:


  MatrixMultiply_Block(

      int num_rows,

      int num_cols,

      int num_samples,

      int blocks) {

    dims.push_back(num_rows);

    dims.push_back(num_cols);

    dims.push_back(num_samples);

    dims.push_back(blocks);

  }


  void apply(

      const SelectivityVector& rows,

      std::vector<VectorPtr>& args,

      const TypePtr& type,

      exec::EvalCtx& context,

      VectorPtr& output) const override {

    auto elementType =

        ArrayType(std::make_shared<ArrayType>(ArrayType(REAL())));

    BaseVector::ensureWritable(

        rows, std::make_shared<ArrayType>(elementType), context.pool(), output);

    VectorMaker maker{context.pool()};


    BaseVector* left = args[0].get();

    BaseVector* right = args[1].get();


    exec::LocalDecodedVector leftHolder(context, *left, rows);

    auto decodedLeftArray = leftHolder.get();

    auto baseLeftArray =

        decodedLeftArray->base()->as<ArrayVector>()->elements();


    exec::LocalDecodedVector rightHolder(context, *right, rows);

    auto decodedRightArray = rightHolder.get();

    auto baseRightArray = rightHolder->base()->as<ArrayVector>()->elements();


    float* input_values_v = baseLeftArray->values()->asMutable<float>();

    float* input_values_w = baseRightArray->values()->asMutable<float>();


    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        m1(input_values_v, dims[2], dims[0]);

    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        m2(input_values_w, dims[0], dims[1]);

    Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> m =

        m1 * m2;


    std::vector<std::vector<float>> result;

    for (int i = 0; i < m.rows(); i++) {

      std::vector<float> row(m.row(i).data(), m.row(i).data() + m.cols());

      result.push_back(row);

    }

    auto baseVector = maker.arrayVector<float>(result, REAL());

    auto arrayOfArrays = maker.arrayVector({0}, baseVector);

    output = arrayOfArrays;

  }


  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

    return {exec::FunctionSignatureBuilder()

                .returnType("array(array(REAL))")

                .argumentType("array(REAL)")

                .argumentType("array(REAL)")

                .build()};

  }


  float* getTensor() const override {

    return weights_;

  }


  std::string getFuncName() {

    return getName();

  };


  static std::string getName() {

    return "mat_mul_block";

  };


 private:

  float* weights_;

};


class MatrixAddition : public MLFunction {

 public:


  MatrixAddition(float* weights, int num_cols) {

    weights_ = weights;

    dims.push_back(num_cols);

  }


  MatrixAddition(std::string weightsFile, int num_cols) {

    weightsFile_ = weightsFile;

    dims.push_back(num_cols);

  }


  void apply(

      const SelectivityVector& rows,

      std::vector<VectorPtr>& args,

      const TypePtr& type,

      exec::EvalCtx& context,

      VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();


    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        m1(input_values, rows.size(), dims[0]);

    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        m2(weights_, rows.size(), dims[0]);


    Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> m =

        m1 + m2;


    std::vector<std::vector<float>> result;

    for (int i = 0; i < m.rows(); i++) {

      std::vector<float> row(m.row(i).data(), m.row(i).data() + m.cols());

      result.push_back(row);

    }

    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(result, REAL());

  }


  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

    return {exec::FunctionSignatureBuilder()

                .returnType("array(REAL)")

                .argumentType("array(REAL)")

                .build()};

  }


  float* getTensor() const override {

    return weights_;

  }


  std::string getFuncName() {

    return getName();

  };


  static std::string getName() {

    return "mat_add";

  };


  std::string getWeightsFile() {

    return weightsFile_;

  }


  void setWeights(float* weights) {

    weights_ = weights;

  }


  CostEstimate getCost(std::vector<int> inputDims) {

    std::vector<double> coefficientVector = getCoefficientVector(getName());

    float cost = coefficientVector[0] * inputDims[0] * inputDims[1];

    return CostEstimate(cost, inputDims[0], inputDims[1]);

  }


 private:

  float* weights_;

  std::string weightsFile_;

};


class MatrixVectorAddition : public MLFunction {

public:


    MatrixVectorAddition(float* weights, int num_cols) {

        // Create a deep copy of the weights

        weights_ = new float[num_cols];

        std::memcpy(weights_, weights, num_cols * sizeof(float));

        dims.push_back(num_cols);

    }


    MatrixVectorAddition(std::string weightsFile, int num_cols) {

        weightsFile_ = weightsFile;

        dims.push_back(num_cols);

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);

    output->clearNulls(rows);

    auto arrayOutput = output->as<ArrayVector>();

    auto sizes = arrayOutput->mutableSizes(rows.end());

    auto rawSizes = sizes->asMutable<int32_t>();

    auto offsets = arrayOutput->mutableOffsets(rows.end());

    auto rawOffsets = offsets->asMutable<int32_t>();


    // Initialize sizes and offsets to zero.

    std::fill(rawSizes, rawSizes + rows.end(), 0);

    std::fill(rawOffsets, rawOffsets + rows.end(), 0);

    auto elementsOutput = arrayOutput->elements();

    auto elementsPool = context.pool();


    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto numRows = rows.size();

    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    std::map<vector_size_t, vector_size_t> rowMap;

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // add it

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

      } else {

        // already added

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          inputValues + inputOffsets[rawIndex], dims[0]);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        vectorMatrix(weights_, 1, dims[0]);


    inputMatrix.rowwise() += vectorMatrix.row(0);


    auto baseOffset = elementsOutput->size();

    elementsOutput->resize(baseOffset + rows.end() * dims[0]);

    float* outputValues = elementsOutput->values()->asMutable<float>();


    vector_size_t outputOffset = 0;


    rows.applyToSelected([&](vector_size_t row) {

      if (rowMap.find(row) == rowMap.end()) {

        throw std::runtime_error(

            "Mapped index not found for the result matrix.");

      }

      auto mappedIndexInResultMatrix = rowMap[row];

      rawOffsets[row] = outputOffset;

      rawSizes[row] = dims[0];


      std::memcpy(

          outputValues + outputOffset,

          inputMatrix.row(mappedIndexInResultMatrix).data(),

          dims[0] * sizeof(float));


      outputOffset += dims[0];

    });

    arrayOutput->setElements(elementsOutput);

  }


  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

    return {exec::FunctionSignatureBuilder()

                .returnType("array(REAL)")

                .argumentType("array(REAL)")

                .build()};

  }


  float* getTensor() const override {

    return weights_;

  }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return weights_;

    }


    std::string getFuncName() {

        return getName();

    };


    static std::string getName() {

        return "mat_add";

    };


    std::string getWeightsFile() {

        return weightsFile_;

    }


    void setWeights(float* weights) {

        weights_ = weights;

    }


private:

    float* weights_;

    std::string weightsFile_;

    std::vector<int> dims;

};


class Sigmoid : public MLFunction {

public:

    Sigmoid() {}


    static float sigmoidFunction(float x) {

        return 1.0f / (1.0f + std::exp(-x));

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);

    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto numRows = rows.size();


    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    std::vector<std::vector<float>> result(numRows);


    rows.applyToSelected([&](vector_size_t i) {

      size_t mappedIndexInRowData = decodedInput->index(i);

      size_t dataSize = inputSizes[mappedIndexInRowData];

      size_t dataOffset = inputOffsets[mappedIndexInRowData];

      std::vector<float> rowResult(dataSize);

      std::transform(

          inputValues + dataOffset,

          inputValues + dataOffset + dataSize,

          rowResult.data(),

          sigmoidFunction);

      result[i] = rowResult;

    });

    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(result, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "sigmoid";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        std::vector<double> coefficientVector = getCoefficientVector(getName());

        float cost = coefficientVector[0] * inputDims[0] * inputDims[1];

        return CostEstimate(cost, inputDims[0], inputDims[1]);

    }


};


class Relu : public MLFunction {

public:

    Relu() {}


    static float reluFunction(float x) {

        return (x > 0.0f) ? x : 0.0f;

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);

    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto numRows = rows.size();


    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    std::vector<std::vector<float>> result(numRows);


    rows.applyToSelected([&](vector_size_t i) {

      size_t mappedIndexInRowData = decodedInput->index(i);

      size_t dataSize = inputSizes[mappedIndexInRowData];

      size_t dataOffset = inputOffsets[mappedIndexInRowData];

      std::vector<float> rowResult(dataSize);

      std::transform(

          inputValues + dataOffset,

          inputValues + dataOffset + dataSize,

          rowResult.data(),

          reluFunction);

      result[i] = rowResult;

    });

    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(result, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "relu";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        std::vector<double> coefficientVector = getCoefficientVector(getName());

        float cost = coefficientVector[0] * inputDims[0] * inputDims[1];

        return CostEstimate(cost, inputDims[0], inputDims[1]);

    }


};


class Softmax : public MLFunction {

public:

    Softmax() {}


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);

    output->clearNulls(rows);

    auto arrayOutput = output->as<ArrayVector>();

    auto sizes = arrayOutput->mutableSizes(rows.end());

    auto rawSizes = sizes->asMutable<int32_t>();

    auto offsets = arrayOutput->mutableOffsets(rows.end());

    auto rawOffsets = offsets->asMutable<int32_t>();


    // Initialize sizes and offsets to zero.

    std::fill(rawSizes, rawSizes + rows.end(), 0);

    std::fill(rawOffsets, rawOffsets + rows.end(), 0);

    auto elementsOutput = arrayOutput->elements();

    auto elementsPool = context.pool();


    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto numRows = rows.size();

    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    std::map<vector_size_t, vector_size_t> rowMap;

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    int numCols;

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // add it

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

        numCols = inputSizes[mappedIndexInRowData];

      } else {

        // already added

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, numCols);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          inputValues + inputOffsets[rawIndex], numCols);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    Eigen::ArrayXXf exp = inputMatrix.array().exp();

    Eigen::ArrayXXf sum = exp.rowwise().sum();

    for (int i = 0; i < exp.rows(); i++) {

      exp.row(i) /= sum(i);

    }


    auto baseOffset = elementsOutput->size();

    elementsOutput->resize(baseOffset + rows.end() * numCols);

    float* outputValues = elementsOutput->values()->asMutable<float>();

    vector_size_t outputOffset = 0;

    rows.applyToSelected([&](vector_size_t row) {

      if (rowMap.find(row) == rowMap.end()) {

        throw std::runtime_error(

            "Mapped index not found for the result matrix.");

      }

      auto mappedIndexInResultMatrix = rowMap[row];

      rawOffsets[row] = outputOffset;

      rawSizes[row] = numCols;


      std::memcpy(

          outputValues + outputOffset,

          exp.row(mappedIndexInResultMatrix).data(),

          numCols * sizeof(float));


      outputOffset += numCols;

    });


    arrayOutput->setElements(elementsOutput);

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "softmax";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        std::vector<double> coefficientVector = getCoefficientVector(getName());

        float cost = coefficientVector[0] * inputDims[0] * inputDims[1];

        return CostEstimate(cost, inputDims[0], inputDims[1]);

    }


};


class Argmax : public MLFunction {

public:

    Argmax() {}


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);

    auto arrayOutput = output->asFlatVector<int>();


    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto numRows = rows.size();


    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    std::map<vector_size_t, vector_size_t> rowMap;

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    int numCols;

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // add it

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

        numCols = inputSizes[mappedIndexInRowData];

      } else {

        // already added

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, numCols);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          inputValues + inputOffsets[rawIndex], numCols);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    std::map<vector_size_t, vector_size_t> argmaxMap;

    for (int i = 0; i < inputMatrix.rows(); i++) {

      Eigen::Index maxRow, maxCol;

      inputMatrix.row(i).maxCoeff(&maxRow, &maxCol);

      argmaxMap[i] = maxCol;

    }


    int* outputValues = arrayOutput->mutableRawValues<int>();

    vector_size_t outputOffset = 0;

    std::unordered_map<int, int> valueCounts;

    rows.applyToSelected([&](vector_size_t row) {

      if (rowMap.find(row) == rowMap.end()) {

        throw std::runtime_error(

            "Mapped index not found for the result matrix.");

      }

      auto mappedIndexInResultMatrix = rowMap[row];

      outputValues[row] = argmaxMap[mappedIndexInResultMatrix];

      valueCounts[outputValues[row]]++;

    });


    for (const auto& pair : valueCounts) {

      LOG(INFO) << "[INFO] Label Distributions: Key: " << pair.first

                << ", Value: " << pair.second << std::endl;

    }

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("INTEGER")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "argmax";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        std::vector<double> coefficientVector = getCoefficientVector(getName());

        float cost = coefficientVector[0] * inputDims[0] * inputDims[1];

        return CostEstimate(cost, inputDims[0], inputDims[1]);

    }


};


class MinMaxScaler : public MLFunction {

public:


    MinMaxScaler(float* scalerMinValues, float* scalerMaxValues, int numCols) {

        scalerMinValues_ = new float[numCols];

        scalerMaxValues_ = new float[numCols];

        std::memcpy(scalerMinValues_, scalerMinValues, numCols * sizeof(float));

        std::memcpy(scalerMaxValues_, scalerMaxValues, numCols * sizeof(float));

        numCols_ = numCols;

    }


    MinMaxScaler(std::string minMaxScalerDataPath) {

    std::vector<float> scalerMinVector;

    std::vector<float> scalerMaxVector;


    if (!std::filesystem::exists(minMaxScalerDataPath)) {

      throw std::runtime_error("File not found: " + minMaxScalerDataPath);

    }

    std::ifstream file(minMaxScalerDataPath);

    std::string line;

    // Read each line from the file

    int lineCount = 0;

    while (std::getline(file, line)) {

      std::istringstream iss(line); // Create a string stream from the line

      float value;


      // Read each value from the line

      // First line should be min values

      // Second line should be max values

      while (iss >> value) {

        if (lineCount == 0) {

          scalerMinVector.push_back(value); // Store the value in tempValues

        } else if (lineCount == 1) {

          scalerMaxVector.push_back(value); // Store the value in tempValues

        } else {

          throw std::runtime_error(

              "Invalid file format, parsed lineCount: " +

              std::to_string(lineCount));

        }

      }

      lineCount++;

    }

    file.close(); // Close the file

    // the size should be equal

    assert(scalerMinVector.size() == scalerMaxVector.size());

    numCols_ = scalerMinVector.size();


    scalerMinValues_ = new float[numCols_];

    scalerMaxValues_ = new float[numCols_];

    std::memcpy(

        scalerMinValues_, scalerMinVector.data(), numCols_ * sizeof(float));

    std::memcpy(

        scalerMaxValues_, scalerMaxVector.data(), numCols_ * sizeof(float));

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    output->clearNulls(rows);

    auto arrayOutput = output->as<ArrayVector>();

    auto sizes = arrayOutput->mutableSizes(rows.end());

    auto rawSizes = sizes->asMutable<int32_t>();

    auto offsets = arrayOutput->mutableOffsets(rows.end());

    auto rawOffsets = offsets->asMutable<int32_t>();


    // Initialize sizes and offsets to zero.

    std::fill(rawSizes, rawSizes + rows.end(), 0);

    std::fill(rawOffsets, rawOffsets + rows.end(), 0);

    auto elementsOutput = arrayOutput->elements();

    auto elementsPool = context.pool();


    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto numRows = rows.size();

    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    std::map<vector_size_t, vector_size_t> rowMap;

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    int numCols = numCols_;

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // add it

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

      } else {

        // already added

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, numCols);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          inputValues + inputOffsets[rawIndex], numCols);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        minVals(scalerMinValues_, 1, numCols);

    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        maxVals(scalerMaxValues_, 1, numCols);

    Eigen::MatrixXf resultMatrix =

        (inputMatrix.rowwise() - minVals.row(0)).array().rowwise() /

        (maxVals.row(0) - minVals.row(0)).array();


    auto baseOffset = elementsOutput->size();

    elementsOutput->resize(baseOffset + rows.end() * numCols);

    float* outputValues = elementsOutput->values()->asMutable<float>();

    vector_size_t outputOffset = 0;

    rows.applyToSelected([&](vector_size_t row) {

      if (rowMap.find(row) == rowMap.end()) {

        throw std::runtime_error(

            "Mapped index not found for the result matrix.");

      }

      auto mappedIndexInResultMatrix = rowMap[row];

      rawOffsets[row] = outputOffset;

      rawSizes[row] = numCols;


      std::memcpy(

          outputValues + outputOffset,

          resultMatrix.row(mappedIndexInResultMatrix).data(),

          numCols * sizeof(float));


      outputOffset += numCols;

    });

    arrayOutput->setElements(elementsOutput);

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "min_max_scaler";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        std::vector<double> coefficientVector = getCoefficientVector(getName());

        float cost = coefficientVector[0] * inputDims[0] * inputDims[1];

        return CostEstimate(cost, inputDims[0], inputDims[1]);

    }


private:

    float* scalerMinValues_;

    float* scalerMaxValues_;

    int numCols_;

};


class TorchDNN2Level : public MLFunction {

public:


    TorchDNN2Level(float** weights, float** bias, std::vector<int> dimensions) {

        this->weights = weights;

        this->bias = bias;

        dims = dimensions;

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::chrono::steady_clock::time_point begin =

        std::chrono::steady_clock::now();

    torch::nn::Linear dense1(dims[0], dims[1]);

    torch::nn::Linear dense2(dims[1], dims[2]);

    torch::nn::ReLU relu;


    torch::Tensor weightTensor1 =

        torch::from_blob(weights[0], {dims[0], dims[1]}).t();

    torch::Tensor weightTensor2 =

        torch::from_blob(weights[1], {dims[1], dims[2]}).t();

    torch::Tensor bias1 = torch::from_blob(bias[0], {dims[1]});

    torch::Tensor bias2 = torch::from_blob(bias[1], {dims[2]});


    dense1->weight.set_data(weightTensor1);

    dense2->weight.set_data(weightTensor2);

    dense1->bias.set_data(bias1);

    dense2->bias.set_data(bias2);


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();

    int input_size = input_elements->size();


    torch::Tensor input =

        torch::from_blob(input_values, {rows.size(), dims[0]});


    torch::Tensor layer1_output = dense1->forward(input);

    torch::Tensor reluOutput = relu->forward(layer1_output);

    torch::Tensor layer2_output = dense2->forward(reluOutput);

    torch::Tensor softmax_output =

        torch::nn::functional::softmax(layer2_output, 1);

    float* data = softmax_output.data_ptr<float>();


    std::vector<std::vector<float>> results;

    for (int i = 0; i < rows.size(); ++i) {

      // std::vector<float> result;

      std::vector<float> result(data + i * dims[2], data + (i + 1) * dims[2]);

      // for (int j = 0; j < dims[2]; ++j) {

      //     result.push_back(data[i*dims[2] + j]);

      // }

      results.push_back(result);

    }

    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    float** getWeights() const {

        return weights;

    }


    float** getBias() const {

        return bias;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "torch_dnn";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        float cost = getWeightedCost(

            getName(), inputDims[0] * inputDims[1] * dims[0] * dims[1]);

        return CostEstimate(cost, inputDims[0], inputDims[1]);

    }


private:

    float** weights;

    float** bias;

    std::vector<int> dims;

};


class TorchDNN : public MLFunction {

public:


    TorchDNN(

        std::vector<float*> weights,

        std::vector<float*> bias,

        std::vector<int> dimensions) {

        this->weights = weights;

        this->bias = bias;

        dims = dimensions;

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::vector<torch::nn::Linear> dense_layers;

    std::vector<torch::Tensor> weights_tensors;

    std::vector<torch::Tensor> bias_tensors;

    std::vector<torch::nn::ReLU> relus;


    // Create layers

    for (int i = 0; i < dims.size() - 1; ++i) {

      dense_layers.push_back(torch::nn::Linear(dims[i], dims[i + 1]));

      weights_tensors.push_back(

          torch::from_blob(weights[i], {dims[i], dims[i + 1]}).t());

      bias_tensors.push_back(torch::from_blob(bias[i], {dims[i + 1]}));

      relus.push_back(torch::nn::ReLU());

    }


    // Set weights and biases

    for (int i = 0; i < dense_layers.size(); ++i) {

      dense_layers[i]->weight.set_data(weights_tensors[i]);

      dense_layers[i]->bias.set_data(bias_tensors[i]);

    }


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();

    torch::Tensor input =

        torch::from_blob(input_values, {rows.size(), dims[0]});


    torch::Tensor output_tensor = input;

    for (int i = 0; i < dense_layers.size(); ++i) {

      output_tensor = dense_layers[i]->forward(output_tensor);

      output_tensor = relus[i]->forward(output_tensor);

    }


    // Softmax output

    output_tensor = torch::nn::functional::softmax(output_tensor, 1);

    float* data = output_tensor.data_ptr<float>();


    // Prepare results

    std::vector<std::vector<float>> results;

    for (int i = 0; i < rows.size(); ++i) {

      std::vector<float> result(

          data + i * dims.back(), data + (i + 1) * dims.back());

      results.push_back(result);

    }


    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    const std::vector<float*>& getWeights() const {

        return weights;

    }


    const std::vector<float*>& getBias() const {

        return bias;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "torchnn";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

    std::vector<double> coefficientVector = getCoefficientVector(getName());

    uint64_t factor1 = inputDims[0] * dims[0] * dims[1];

    uint64_t factor2 = inputDims[0] * dims[1] * dims[2];

    uint64_t factor3 = dims[0] * dims[1];

    uint64_t factor4 = dims[1] * dims[2];

    float cost = coefficientVector[0] * factor1 +

        coefficientVector[1] * factor2 + coefficientVector[2] * factor3 +

        coefficientVector[3] * factor4 + coefficientVector[4] * inputDims[0] +

        coefficientVector[5] * dims[0] + coefficientVector[6] * dims[1] +

        coefficientVector[7] * dims[2];

    // LOG(INFO) << fmt::format("[DEBUG] 4 values: {}, {}, {}, {}",inputDims[0],

    // inputDims[1], dims[0], dims[1]); LOG(INFO) << fmt::format("[DEBUG] coeff:

    // {}",coefficientVector); LOG(INFO) << fmt::format("[DEBUG] Cost

    // Computation: {}, {}, {}, {}, {}, {}, {}, {}", coefficientVector[0] *

    // factor1, coefficientVector[1] * factor2, coefficientVector[2] * factor3

    //             , coefficientVector[3] * factor4,  coefficientVector[4] *

    //             inputDims[0] , coefficientVector[5] * dims[0],

    //             coefficientVector[6] * dims[1] , coefficientVector[7] *

    //             dims[2]);

    // LOG(INFO) << fmt::format("[DEBUG] compute debug: {}, {}, {}, {}, {}",

    // inputDims[0], inputDims[0]*dims[1], inputDims[0]*dims[1]*dims[2],

    // factor2, coefficientVector[1] * factor2);


    return CostEstimate(cost, inputDims[0], dims[2]);

    }


private:

    std::vector<float*> weights;

    std::vector<float*> bias;

    std::vector<int> dims;

};


namespace velox::dl {


enum class KernelType {

  MatMul,

  MatAdd,

  ReLU,

  Softmax,

  BatchNorm,

  Argmax,

  Sigmoid

};


std::string kernelTypeToString(KernelType kernelType) {

  switch (kernelType) {

    case KernelType::MatMul:

      return "MatMul";

    case KernelType::MatAdd:

      return "MatAdd";

    case KernelType::ReLU:

      return "ReLU";

    case KernelType::Softmax:

      return "Softmax";

    case KernelType::BatchNorm:

      return "BatchNorm";

    case KernelType::Argmax:

      return "Argmax";

    case KernelType::Sigmoid:

      return "Sigmoid";

    default:

      return "Unknown";

  }

}


std::ostream& operator<<(std::ostream& os, KernelType kernelType) {

  switch (kernelType) {

    case KernelType::MatMul:

      return os << "MatMul";

    case KernelType::MatAdd:

      return os << "MatAdd";

    case KernelType::ReLU:

      return os << "ReLU";

    case KernelType::Softmax:

      return os << "Softmax";

    case KernelType::BatchNorm:

      return os << "BatchNorm";

    case KernelType::Argmax:

      return os << "Argmax";

    default:

      return os << "Unknown";

  }

}


} // namespace velox::dl


class TorchDNNV2 : public MLFunction {

public:


    TorchDNNV2(

        std::vector<velox::dl::KernelType> kernelTypes,

        std::vector<float*> weights,

        std::vector<int> dimensions) {

        this->weights = weights;

        dims = dimensions;

        kernelTypes_ = kernelTypes;

    int numOps = kernelTypes.size();

    int weightIdx = 0;

    hasArgmax_ = false;

    model_ = torch::nn::Sequential();

    if (2 * numOps != dims.size()) {

      throw std::runtime_error(fmt::format(

          "Mismatched number of  2*kernel types and dimensions: {} vs {}",

          2 * numOps,

          dims.size()));

    }

    assert(2 * numOps == dims.size());

    for (int i = 0; i < numOps; ++i) {

      if (kernelTypes[i] == velox::dl::KernelType::MatMul &&

          kernelTypes[i + 1] == velox::dl::KernelType::MatAdd) {

        auto denseLayer = torch::nn::Linear(dims[2 * i], dims[2 * i + 1]);

        denseLayer->weight.set_data(

            torch::from_blob(

                weights[weightIdx++], {dims[2 * i], dims[2 * i + 1]})

                .t());

        denseLayer->bias.set_data(

            torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));

        model_->push_back(denseLayer);

      } else if (kernelTypes[i] == velox::dl::KernelType::MatAdd) {

        // Do nothing, which is handled by creating a Dense Layer in the above

        // code

      } else if (kernelTypes[i] == velox::dl::KernelType::BatchNorm) {

        auto batchNormLayer = torch::nn::BatchNorm1d(dims[2 * i]);

        batchNormLayer->weight.set_data(

            torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));

        batchNormLayer->bias.set_data(

            torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));

        model_->push_back(batchNormLayer);

      } else if (kernelTypes[i] == velox::dl::KernelType::ReLU) {

        model_->push_back(torch::nn::ReLU());

      } else if (kernelTypes[i] == velox::dl::KernelType::Sigmoid) {

        model_->push_back(torch::nn::Sigmoid());

      } else if (kernelTypes[i] == velox::dl::KernelType::Softmax) {

        model_->push_back(torch::nn::Softmax(1));

      } else if (kernelTypes[i] == velox::dl::KernelType::Argmax) {

        model_->push_back(LibTorchArgmaxKernel(1));

        hasArgmax_ = true;

      } else {

        throw std::runtime_error(fmt::format(

            "Unsupported kernel type of TorchDNNV2: {}", kernelTypes[i]));

      }

    }

    // enable evaluation mode, this is required for inference, otherwise some

    // module could failed, like dropout, batchnorm, etc.

    model_->eval();

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    context.ensureWritable(rows, type, output);

    output->clearNulls(rows);


    // Perform matrix multiplication logic.

    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    // The map between the row index in the input data and the row index in

    // the output data.

    std::map<vector_size_t, vector_size_t> rowMap;

    // for efficient check

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    // for iterating over the insert ordering

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // add it

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

      } else {

        // already added

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          inputValues + inputOffsets[rawIndex], dims[0]);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    float* inputValues1 = inputMatrix.data();


    torch::Tensor input =

        torch::from_blob(inputValues1, {numUniqueRows, dims[0]});

    torch::Tensor output_tensor = input;


    output_tensor =

        const_cast<torch::nn::Sequential&>(model_)->forward(output_tensor);

    // Append results to the output vector.

    if (hasArgmax_) {

      auto arrayOutput = output->asFlatVector<int>();

      int* outputValues = arrayOutput->mutableRawValues<int>();

      auto int_tensor = output_tensor.to(torch::kInt);

      int* dataInt = int_tensor.data_ptr<int>();


      rows.applyToSelected([&](vector_size_t row) {

        if (rowMap.find(row) == rowMap.end()) {

          throw std::runtime_error(

              "Mapped index not found for the result matrix.");

        }

        auto mappedIndexInResultMatrix = rowMap[row];

        outputValues[row] = dataInt[mappedIndexInResultMatrix];

      });

    } else {

      auto arrayOutput = output->as<ArrayVector>();

      auto sizes = arrayOutput->mutableSizes(rows.end());

      auto rawSizes = sizes->asMutable<int32_t>();

      auto offsets = arrayOutput->mutableOffsets(rows.end());

      auto rawOffsets = offsets->asMutable<int32_t>();


      // Initialize sizes and offsets to zero.

      std::fill(rawSizes, rawSizes + rows.end(), 0);

      std::fill(rawOffsets, rawOffsets + rows.end(), 0);


      auto elementsOutput = arrayOutput->elements();

      auto elementsPool = context.pool();

      auto baseOffset = elementsOutput->size();

      elementsOutput->resize(baseOffset + rows.end() * dims.back());


      float* outputValues = elementsOutput->values()->asMutable<float>();

      vector_size_t outputOffset = 0;

      float* dataFloat = output_tensor.data_ptr<float>();


      rows.applyToSelected([&](vector_size_t row) {

        if (rowMap.find(row) == rowMap.end()) {

          throw std::runtime_error(

              "Mapped index not found for the result matrix.");

        }

        auto mappedIndexInResultMatrix = rowMap.at(row);

        rawOffsets[row] = outputOffset;

        rawSizes[row] = dims.back();

        std::memcpy(

            outputValues + outputOffset,

            dataFloat + mappedIndexInResultMatrix * dims.back(),

            dims.back() * sizeof(float));

        outputOffset += dims.back();

      });

      arrayOutput->setElements(elementsOutput);

    }

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {

            exec::FunctionSignatureBuilder()

                .returnType("array(REAL)")

                .argumentType("array(REAL)")

                .build(),

            exec::FunctionSignatureBuilder()

                .returnType("INTEGER")

                .argumentType("array(REAL)")

                .argumentType("INTEGER")

                .build(),

            exec::FunctionSignatureBuilder()

                .returnType("INTEGER")

                .argumentType("array(REAL)")

                .argumentType("BIGINT")

                .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    const std::vector<float*>& getWeights() const {

        return weights;

    }


    const std::vector<float*>& getBias() const {

        return bias;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "complexTorchNN";

    }


    std::vector<velox::dl::KernelType> getKernelTypes() const {

        return kernelTypes_;

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        return CostEstimate(0, inputDims[0], inputDims[1]);

    }


private:

    std::vector<float*> weights;

    std::vector<float*> bias;

    std::vector<velox::dl::KernelType> kernelTypes_;

    bool hasArgmax_;

    torch::nn::Sequential model_;

};


class TorchDNNV2CUDA : public MLFunction {

public:


    TorchDNNV2CUDA(

        std::vector<velox::dl::KernelType> kernelTypes,

        std::vector<float*> weights,

        std::vector<int> dimensions) {

        device_ = "cuda:0"; // Initialize CUDA device.

        this->weights = weights;

        dims = dimensions;

        kernelTypes_ = kernelTypes;

    int numOps = kernelTypes.size();

    int weightIdx = 0;

    hasArgmax_ = false;

    model_ = torch::nn::Sequential();

    assert(2 * numOps == dims.size());

    for (int i = 0; i < numOps; ++i) {

      if (kernelTypes[i] == velox::dl::KernelType::MatMul &&

          kernelTypes[i + 1] == velox::dl::KernelType::MatAdd) {

        auto denseLayer = torch::nn::Linear(dims[2 * i], dims[2 * i + 1]);

        denseLayer->weight.set_data(

            torch::from_blob(

                weights[weightIdx++], {dims[2 * i], dims[2 * i + 1]})

                .t());

        denseLayer->bias.set_data(

            torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));

        model_->push_back(denseLayer);

      } else if (kernelTypes[i] == velox::dl::KernelType::MatAdd) {

        // Do nothing, which is handled by creating a Dense Layer in the above

        // code

      } else if (kernelTypes[i] == velox::dl::KernelType::BatchNorm) {

        auto batchNormLayer = torch::nn::BatchNorm1d(dims[2 * i]);

        batchNormLayer->weight.set_data(

            torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));

        batchNormLayer->bias.set_data(

            torch::from_blob(weights[weightIdx++], {dims[2 * i + 1]}));

        model_->push_back(batchNormLayer);

      } else if (kernelTypes[i] == velox::dl::KernelType::ReLU) {

        model_->push_back(torch::nn::ReLU());

      } else if (kernelTypes[i] == velox::dl::KernelType::Sigmoid) {

        model_->push_back(torch::nn::Sigmoid());

      } else if (kernelTypes[i] == velox::dl::KernelType::Softmax) {

        model_->push_back(torch::nn::Softmax(1));

      } else if (kernelTypes[i] == velox::dl::KernelType::Argmax) {

        model_->push_back(LibTorchArgmaxKernel(1));

        hasArgmax_ = true;

      } else {

        throw std::runtime_error(fmt::format(

            "Unsupported kernel type of TorchDNNV2: {}", kernelTypes[i]));

      }

    }

    // enable evaluation mode, this is required for inference, otherwise some

    // module could failed, like dropout, batchnorm, etc.

    model_->to(device_);

    model_->eval();

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    context.ensureWritable(rows, type, output);

    output->clearNulls(rows);


    // Perform matrix multiplication logic.

    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    // The map between the row index in the input data and the row index in

    // the output data.

    std::map<vector_size_t, vector_size_t> rowMap;

    // for efficient check

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    // for iterating over the insert ordering

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // add it

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

      } else {

        // already added

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, dims[0]);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          inputValues + inputOffsets[rawIndex], dims[0]);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    float* inputValues1 = inputMatrix.data();


    torch::Tensor input =

        torch::from_blob(inputValues1, {numUniqueRows, dims[0]});

    torch::Tensor output_tensor = input;

    output_tensor = output_tensor.to(device_);


    output_tensor =

        const_cast<torch::nn::Sequential&>(model_)->forward(output_tensor);

    output_tensor = output_tensor.to(torch::kCPU);

    // Append results to the output vector.


    if (hasArgmax_) {

      auto arrayOutput = output->asFlatVector<int>();

      int* outputValues = arrayOutput->mutableRawValues<int>();

      auto int_tensor = output_tensor.to(torch::kInt);

      int* dataInt = int_tensor.data_ptr<int>();


      rows.applyToSelected([&](vector_size_t row) {

        if (rowMap.find(row) == rowMap.end()) {

          throw std::runtime_error(

              "Mapped index not found for the result matrix.");

        }

        auto mappedIndexInResultMatrix = rowMap[row];

        outputValues[row] = dataInt[mappedIndexInResultMatrix];

      });

    } else {

      auto arrayOutput = output->as<ArrayVector>();

      auto sizes = arrayOutput->mutableSizes(rows.end());

      auto rawSizes = sizes->asMutable<int32_t>();

      auto offsets = arrayOutput->mutableOffsets(rows.end());

      auto rawOffsets = offsets->asMutable<int32_t>();


      // Initialize sizes and offsets to zero.

      std::fill(rawSizes, rawSizes + rows.end(), 0);

      std::fill(rawOffsets, rawOffsets + rows.end(), 0);


      auto elementsOutput = arrayOutput->elements();

      auto elementsPool = context.pool();

      auto baseOffset = elementsOutput->size();

      elementsOutput->resize(baseOffset + rows.end() * dims.back());

      float* outputValues = elementsOutput->values()->asMutable<float>();

      vector_size_t outputOffset = 0;


      float* dataFloat = output_tensor.data_ptr<float>();


      rows.applyToSelected([&](vector_size_t row) {

        if (rowMap.find(row) == rowMap.end()) {

          throw std::runtime_error(

              "Mapped index not found for the result matrix.");

        }

        auto mappedIndexInResultMatrix = rowMap[row];

        rawOffsets[row] = outputOffset;

        rawSizes[row] = dims.back();

        std::memcpy(

            outputValues + outputOffset,

            dataFloat + mappedIndexInResultMatrix * dims.back(),

            dims.back() * sizeof(float));

        outputOffset += dims.back();

      });

      arrayOutput->setElements(elementsOutput);

    }

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {

            exec::FunctionSignatureBuilder()

                .returnType("array(REAL)")

                .argumentType("array(REAL)")

                .build(),

            exec::FunctionSignatureBuilder()

                .returnType("INTEGER")

                .argumentType("array(REAL)")

                .argumentType("INTEGER")

                .build(),

            exec::FunctionSignatureBuilder()

                .returnType("INTEGER")

                .argumentType("array(REAL)")

                .argumentType("BIGINT")

                .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    const std::vector<float*>& getWeights() const {

        return weights;

    }


    const std::vector<float*>& getBias() const {

        return bias;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "complexTorchNN_GPU";

    }


    std::vector<velox::dl::KernelType> getKernelTypes() const {

        return kernelTypes_;

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        return CostEstimate(0, inputDims[0], inputDims[1]);

    }


private:

    std::vector<float*> weights;

    std::vector<float*> bias;

    std::vector<velox::dl::KernelType> kernelTypes_;

    bool hasArgmax_;

    std::string device_;

    torch::nn::Sequential model_;

};


class TorchDNNKernel : public MLFunction {

public:


    TorchDNNKernel(

        std::string kernel,

        float* weights,

        float* bias,

        std::vector<int> dimensions) {

        this->kernel = kernel;

        this->weights = weights;

        this->bias = bias;

        dims = dimensions;

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::vector<torch::nn::Linear> dense_layers;

    std::vector<torch::Tensor> weights_tensors;

    std::vector<torch::Tensor> bias_tensors;

    std::vector<torch::nn::ReLU> relus;


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();

    torch::Tensor input =

        torch::from_blob(input_values, {rows.size(), dims[0]});

    torch::Tensor output_tensor = input;


    if (kernel == "Dense") {

      torch::nn::Linear denseLayer = torch::nn::Linear(dims[0], dims[1]);

      torch::Tensor weightsTensor =

          torch::from_blob(weights, {dims[0], dims[1]}).t();

      torch::Tensor biasTensor = torch::from_blob(bias, {dims[1]});

      denseLayer->weight.set_data(weightsTensor);

      denseLayer->bias.set_data(biasTensor);


      output_tensor = denseLayer->forward(output_tensor);

    } else if (kernel == "Relu") {

      torch::nn::ReLU reluLayer = torch::nn::ReLU();


      output_tensor = reluLayer->forward(output_tensor);

    } else if (kernel == "Softmax") {

      output_tensor = torch::nn::functional::softmax(output_tensor, 1);

    }


    // output_tensor = torch::nn::functional::softmax(output_tensor, 1);

    float* data = output_tensor.data_ptr<float>();


    // Prepare results

    std::vector<std::vector<float>> results;

    for (int i = 0; i < rows.size(); ++i) {

      std::vector<float> result(

          data + i * dims.back(), data + (i + 1) * dims.back());

      results.push_back(result);

    }


    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    const float* getWeights() const {

        return weights;

    }


    const float* getBias() const {

        return bias;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "torchnn_kernel";

    }


private:

    float* weights;

    float* bias;

    std::string kernel;

    std::vector<int> dims;

};


class TorchDNN_Multi : public MLFunction {

public:


    TorchDNN_Multi(

        std::vector<float*> weights,

        std::vector<float*> bias,

        std::vector<int> dimensions) {

        this->weights = weights;

        this->bias = bias;

        dims = dimensions;

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::vector<torch::nn::Linear> dense_layers;

    std::vector<torch::Tensor> weights_tensors;

    std::vector<torch::Tensor> bias_tensors;

    std::vector<torch::nn::ReLU> relus;


    // Create layers

    for (int i = 0; i < dims.size() - 1; ++i) {

      dense_layers.push_back(torch::nn::Linear(dims[i], dims[i + 1]));

      weights_tensors.push_back(

          torch::from_blob(weights[i], {dims[i], dims[i + 1]}).t());

      bias_tensors.push_back(torch::from_blob(bias[i], {dims[i + 1]}));

      relus.push_back(torch::nn::ReLU());

    }


    // Set weights and biases

    for (int i = 0; i < dense_layers.size(); ++i) {

      dense_layers[i]->weight.set_data(weights_tensors[i]);

      dense_layers[i]->bias.set_data(bias_tensors[i]);

    }


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();

    torch::Tensor input =

        torch::from_blob(input_values, {rows.size(), dims[0]});


    torch::Tensor output_tensor = input;

    for (int i = 0; i < dense_layers.size(); ++i) {

      output_tensor = dense_layers[i]->forward(output_tensor);

      output_tensor = relus[i]->forward(output_tensor);

    }


    // Softmax output

    output_tensor = torch::nn::functional::softmax(output_tensor, 1);

    float* data = output_tensor.data_ptr<float>();


    // Prepare results

    std::vector<std::vector<float>> results;

    for (int i = 0; i < rows.size(); ++i) {

      std::vector<float> result(

          data + i * dims.back(), data + (i + 1) * dims.back());

      results.push_back(result);

    }


    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    const std::vector<float*>& getWeights() const {

        return weights;

    }


    const std::vector<float*>& getBias() const {

        return bias;

    }


private:

    std::vector<float*> weights;

    std::vector<float*> bias;

    std::vector<int> dims;

};


class Convolute : public MLFunction {

 public:


  Convolute(float* weights, int* dims_) {

    weights_ = weights;

    for (int i = 0; i < 6; i++)

      dims.push_back(dims_[i]);

  }


  void apply(

      const SelectivityVector& rows,

      std::vector<VectorPtr>& args,

      const TypePtr& type,

      exec::EvalCtx& context,

      VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();


    int input_height = dims[4];

    int input_width = dims[5];

    int input_channel_size = input_height * input_width;

    int input_size = input_channel_size * dims[3];


    int filter_channel_size = dims[1] * dims[2];

    int filter_size = filter_channel_size * dims[3];


    int output_height = input_height - dims[1] + 1;

    int output_width = input_width - dims[2] + 1;


    std::vector<std::vector<float>> results(

        rows.size(),

        std::vector<float>(output_height * output_width * dims[0]));


    std::chrono::steady_clock::time_point begin =

        std::chrono::steady_clock::now();


    for (int s = 0; s < rows.size(); s++) {

      // for each channel

      for (int c = 0; c < dims[3]; c++) {

        Eigen::Map<

            Eigen::

                Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

            input(

                input_values + s * input_size + c * input_channel_size,

                input_height,

                input_width);

        // for every filter

        for (int f = 0; f < dims[0]; f++) {

          int filter_offset = f * output_height * output_width;

          Eigen::Map<Eigen::Matrix<

              float,

              Eigen::Dynamic,

              Eigen::Dynamic,

              Eigen::RowMajor>>

              kernel(

                  weights_ + f * filter_size + c * filter_channel_size,

                  dims[1],

                  dims[2]);

          for (int i = 0; i < output_height; ++i) {

            int offset = filter_offset + i * output_width;

            for (int j = 0; j < output_width; ++j) {

              results[s][offset + j] +=

                  (input.block(i, j, dims[1], dims[2]).cwiseProduct(kernel))

                      .sum();

            }

          }

        }

      }

    }


    std::chrono::steady_clock::time_point end =

        std::chrono::steady_clock::now();

    std::cout << "Time for conv2d (sec) = "

              << (std::chrono::duration_cast<std::chrono::microseconds>(

                      end - begin)

                      .count()) /

            1000000.0

              << std::endl;


    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

  }


  static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

    return {exec::FunctionSignatureBuilder()

                .returnType("array(REAL)")

                .argumentType("array(REAL)")

                .build()};

  }


  float* getTensor() const override {

    return weights_;

  }


  std::string getFuncName() {

    return "conv2d";

  };


  static std::string getName() {

    return "conv2d";

  };


 private:

  float* weights_;

  std::vector<int> dims;

};


class TorchConvolute : public MLFunction {

public:


    TorchConvolute(float* weights, int* dims_) {

        weights_ = weights;

        for (int i = 0; i < 6; i++)

            dims.push_back(dims_[i]);

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::chrono::steady_clock::time_point begin =

        std::chrono::steady_clock::now();

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();


    int input_height = dims[4];

    int input_width = dims[5];


    int output_height = input_height - dims[1] + 1;

    int output_width = input_width - dims[2] + 1;


    std::vector<std::vector<float>> results(

        rows.size(),

        std::vector<float>(output_height * output_width * dims[0]));


    torch::nn::Conv2d conv_layer(

        torch::nn::Conv2dOptions(dims[3], dims[0], {dims[1], dims[2]}));

    // torch::Tensor conv_weights = torch::tensor(weights_).view({dims[3],

    // dims[0], dims[1], dims[2]});


    // conv_layer->weight = torch::nn::parameter::Parameter (conv_weights);

    torch::Tensor input_data = torch::from_blob(

        input_values, {rows.size(), dims[3], input_height, input_width});


    torch::Tensor output_data = conv_layer(input_data);


    float* data = output_data.data_ptr<float>();


    int row_size = output_height * output_width * dims[0];


    for (int i = 0; i < rows.size(); ++i) {

      std::vector<float> result;

      for (int j = 0; j < row_size; ++j) {

        result.push_back(data[i * row_size + j]);

      }

      results.push_back(result);

    }


    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    std::chrono::steady_clock::time_point end =

        std::chrono::steady_clock::now();

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return weights_;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "torchconv2d";

    }


private:

    float* weights_;

    std::vector<int> dims;

};


class TorchCNN : public MLFunction {

public:


    TorchCNN(float* weights, float* bias, int* dims_) {

        weights_ = weights;

        bias_ = bias;

        for (int i = 0; i < 7; i++)

            dims.push_back(dims_[i]);

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::chrono::steady_clock::time_point begin =

        std::chrono::steady_clock::now();

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();


    int input_height = dims[4];

    int input_width = dims[5];


    int output_height = input_height - dims[1] + 1;

    int output_width = input_width - dims[2] + 1;


    int input_size = input_elements->size();

    // std::cout << "input_size:" << "," << input_size << std::endl;

    // std::cout << "input_values:" << "," << input_values[0] << "," <<

    // input_values[1] << "," << input_values[2080] << std::endl; std::cout <<

    // "row size" << "," << rows.size() << std::endl;


    std::vector<std::vector<float>> results(

        rows.size(),

        std::vector<float>(output_height * output_width * dims[0]));


    torch::nn::Conv2d conv_layer(

        torch::nn::Conv2dOptions(dims[0], dims[3], {dims[1], dims[2]})

            .bias(false));

    // torch::nn::Conv2d conv_layer(torch::nn::Conv2dOptions(dims[3], dims[0],

    // {dims[1], dims[2]}));

    torch::Tensor conv_weights =

        torch::from_blob(weights_, {dims[3], dims[0], dims[1], dims[2]})

            .to(torch::kFloat);


    auto parameters = conv_layer->named_parameters();


    // Find and set the weight parameter

    for (auto& named_param : parameters) {

      if (named_param.key() == "weight") {

        named_param.value().data() = conv_weights;

        break;

      }

    }

    torch::Tensor input_data =

        torch::from_blob(

            input_values, {rows.size(), dims[3], input_height, input_width})

            .to(torch::kFloat);


    torch::Tensor output_data = conv_layer->forward(input_data);


    // Convert bias values to a tensor

    torch::Tensor bias_tensor = torch::from_blob(bias_, {dims[0]});

    if (conv_layer->bias.defined()) {

      output_data += bias_tensor;

    }


    // output_data = torch::relu(output_data);


    // output_data = torch::max_pool2d(output_data, {dims[6], dims[6]});


    float* data = output_data.data_ptr<float>();


    int row_size = output_height * output_width * dims[0];


    for (int i = 0; i < rows.size(); ++i) {

      std::vector<float> result;

      for (int j = 0; j < row_size; ++j) {

        result.push_back(data[i * row_size + j]);

      }

      results.push_back(result);

    }


    // for (auto entry: results) {

    //     for (int i =0; i < 1000; i++){

    //         std::cout << entry[i] << std::endl;

    //     }

    // }


    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    std::chrono::steady_clock::time_point end =

        std::chrono::steady_clock::now();

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return weights_;

    }


    float* getWeights() const {

        return weights_;

    }


    float* getBias() const {

        return bias_;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "torchcnn";

    }


private:

    float* weights_;

    float* bias_;

    std::vector<int> dims;

};


class VectorScalarAddition : public MLFunction {

public:


    VectorScalarAddition(float* weights, int size) {

        weights_ = weights;

        dims.push_back(size);

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();

    int num_cols = input_elements->size() / rows.size();


    Eigen::Map<

        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

        input(input_values, rows.size(), num_cols);

    // for each filter add bias

    for (int i = 0, step = num_cols / dims[0]; i < dims[0]; i++) {

      input.block(0, i * step, rows.size(), step).array() += weights_[i];

    }


    std::vector<std::vector<float>> results(

        input.rows(), std::vector<float>(input.cols()));

    for (int i = 0; i < input.rows(); ++i) {

      for (int j = 0; j < input.cols(); ++j) {

        results[i][j] = input(i, j);

      }

    }

    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return weights_;

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "vec_scal_add";

    }


private:

    float* weights_;

};


class MaxPool : public MLFunction {

public:


    MaxPool(int side, int rows, int cols) {

        dims.push_back(side);

        dims.push_back(rows);

        dims.push_back(cols);

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    auto input_elements = args[0]->as<ArrayVector>()->elements();

    float* input_values = input_elements->values()->asMutable<float>();

    int num_cols = input_elements->size() / rows.size();

    int num_channels = num_cols / (dims[1] * dims[2]);

    int side = dims[0];

    int output_size = (dims[1] * dims[2]) / (side * side);

    int output_rows = dims[1] / side;

    int output_cols = dims[2] / side;

    // this can be done by using one big matrix but padding will not be possible

    // then this doesn't support padding yet but this makes it possible to add

    // it later

    std::vector<std::vector<float>> results(

        rows.size(), std::vector<float>(num_cols / (side * side)));

    // for each sample

    for (int s = 0; s < rows.size(); s++) {

      for (int c = 0; c < num_channels; c++) {

        Eigen::Map<

            Eigen::

                Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>

            input(

                input_values + s * num_cols + c * dims[1] * dims[2],

                dims[1],

                dims[2]);

        for (int i = 0; i < output_rows; i++) {

          for (int j = 0; j < output_cols; j++) {

            results[s][c * output_size + i * output_cols + j] =

                input.block(i * side, j * side, side, side).maxCoeff();

          }

        }

      }

    }


    // for (const auto& inner_vector : results) {

    //     // Iterate over each element in the inner vector

    //     for (const auto& element : inner_vector) {

    //         std::cout << element << std::endl;

    //     }

    // }


    // for(int i=0; i < 64; i++){


    //     for(int j=0; j < 144; j++){

    //         if(j % 12 == 0)

    //             std::cout << std::endl;

    //         std::cout << results[0][i*144 + j];

    //     }


    // }


    VectorMaker maker{context.pool()};

    output = maker.arrayVector<float>(results, REAL());

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .returnType("array(REAL)")

                    .argumentType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return new float[0];

    }


    std::string getFuncName() {

        return getName();

    }


    static std::string getName() {

        return "max_pool";

    }


private:

    std::vector<int> dims;

};


DecisionTree.h
Implementation of a decision tree for machine learning predictions.

DotProduct.h
Implementation of a dot product function for machine learning.

Dropout.h
Implementation of a dropout layer for machine learning.

Embedding.h
Implementation of an embedding layer for machine learning.

Encoder.h
Implementation of various encoder classes for machine learning.

HuggingFaceServerless.h
Implementation of a Hugging Face serverless API integration for machine learning tasks.

HuggingFaceTokenizer.h
Implementation of a Hugging Face tokenizer for machine learning tasks.

PositionEncoding.h
Implementation of a position encoding function for machine learning.

RAG.h
Implementation of a Retrieval-Augmented Generation (RAG) function for machine learning.

SequencePooling.h
Implementation of a sequence pooling function for machine learning.

XGBoost.h
This file contains the implementation of XGBoost-based machine learning functions in Velox.

Argmax::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1622

Argmax::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1643

Argmax::Argmax
Argmax()
Default constructor.
Definition functions.h:1529

Argmax::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1634

Argmax::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the Argmax function to the input array.
Definition functions.h:1542

Argmax::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the Argmax function.
Definition functions.h:1662

Argmax::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1652

Convolute::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3309

Convolute::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the 2D convolution operation to the input data.
Definition functions.h:3210

Convolute::getTensor
float * getTensor() const override
Returns the filter weights tensor.
Definition functions.h:3301

Convolute::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures for the convolution operation.
Definition functions.h:3290

Convolute::getName
static std::string getName()
Returns the name of the function.
Definition functions.h:3317

Convolute::Convolute
Convolute(float *weights, int *dims_)
Constructs a new Convolute object.
Definition functions.h:3196

MLFunction
A base class for machine learning functions, inheriting from Velox's VectorFunction.
Definition BaseFunction.h:9

MLFunction::getWeightedCost
double getWeightedCost(std::string name, float cost)
Calculates the weighted cost of the function.
Definition BaseFunction.h:70

MLFunction::getCoefficientVector
std::vector< double > getCoefficientVector(std::string name)
Retrieves the cost coefficients for the function.
Definition BaseFunction.h:83

MLFunction::dims
std::vector< int > dims
Dimensions of the function.
Definition BaseFunction.h:61

MatrixAddition::getName
static std::string getName()
Get the name of the function.
Definition functions.h:866

MatrixAddition::getFuncName
std::string getFuncName()
Get the name of the function.
Definition functions.h:858

MatrixAddition::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for matrix addition.
Definition functions.h:839

MatrixAddition::getWeightsFile
std::string getWeightsFile()
Get the weights file associated with this function.
Definition functions.h:874

MatrixAddition::setWeights
void setWeights(float *weights)
Set the weights for this function.
Definition functions.h:882

MatrixAddition::getTensor
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:850

MatrixAddition::MatrixAddition
MatrixAddition(std::string weightsFile, int num_cols)
Constructor for MatrixAddition.
Definition functions.h:792

MatrixAddition::MatrixAddition
MatrixAddition(float *weights, int num_cols)
Constructor for MatrixAddition.
Definition functions.h:782

MatrixAddition::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Apply the matrix addition operation.
Definition functions.h:805

MatrixAddition::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimate the computational cost of the function.
Definition functions.h:891

MatrixMultiply_Block::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for blocked matrix multiplication.
Definition functions.h:733

MatrixMultiply_Block::getTensor
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:745

MatrixMultiply_Block::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Apply the blocked matrix multiplication operation.
Definition functions.h:683

MatrixMultiply_Block::getName
static std::string getName()
Get the name of the function.
Definition functions.h:761

MatrixMultiply_Block::MatrixMultiply_Block
MatrixMultiply_Block(int num_rows, int num_cols, int num_samples, int blocks)
Constructor for MatrixMultiply_Block.
Definition functions.h:664

MatrixMultiply_Block::getFuncName
std::string getFuncName()
Get the name of the function.
Definition functions.h:753

MatrixMultiply_b::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Apply the blocked matrix multiplication operation.
Definition functions.h:363

MatrixMultiply_b::getName
static std::string getName()
Get the name of the function.
Definition functions.h:451

MatrixMultiply_b::getTensor
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:435

MatrixMultiply_b::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for blocked matrix multiplication.
Definition functions.h:423

MatrixMultiply_b::getFuncName
std::string getFuncName()
Get the name of the function.
Definition functions.h:443

MatrixMultiply_b::MatrixMultiply_b
MatrixMultiply_b(int num_rows, int num_cols, int num_samples, int blocks)
Constructor for MatrixMultiply_b.
Definition functions.h:348

MatrixMultiply_h::getFuncName
std::string getFuncName()
Get the name of the function.
Definition functions.h:616

MatrixMultiply_h::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimate the computational cost of the function.
Definition functions.h:633

MatrixMultiply_h::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for hierarchical matrix multiplication.
Definition functions.h:596

MatrixMultiply_h::getTensor
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:608

MatrixMultiply_h::getName
static std::string getName()
Get the name of the function.
Definition functions.h:624

MatrixMultiply_h::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &outputType, exec::EvalCtx &context, VectorPtr &output) const override
Apply the hierarchical matrix multiplication operation.
Definition functions.h:490

MatrixMultiply_h::MatrixMultiply_h
MatrixMultiply_h(int num_rows, int num_cols, int block_size)
Constructor for MatrixMultiply_h.
Definition functions.h:476

MatrixMultiply::getWeightsFile
std::string getWeightsFile()
Get the weights file associated with this function.
Definition functions.h:299

MatrixMultiply::setWeights
void setWeights(float *weights)
Set the weights for this function.
Definition functions.h:307

MatrixMultiply::getFuncName
std::string getFuncName()
Get the name of the function.
Definition functions.h:283

MatrixMultiply::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimate the computational cost of the function.
Definition functions.h:316

MatrixMultiply::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &outputType, exec::EvalCtx &context, VectorPtr &output) const override
Apply the matrix multiplication operation.
Definition functions.h:150

MatrixMultiply::getName
static std::string getName()
Get the name of the function.
Definition functions.h:291

MatrixMultiply::MatrixMultiply
MatrixMultiply(std::string weightsFile, int num_rows, int num_cols)
Constructor for MatrixMultiply.
Definition functions.h:136

MatrixMultiply::getTensor
float * getTensor() const override
Get the tensor data associated with this function.
Definition functions.h:275

MatrixMultiply::MatrixMultiply
MatrixMultiply(float *weights, int num_rows, int num_cols)
Constructor for MatrixMultiply.
Definition functions.h:122

MatrixMultiply::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Get the function signatures for matrix multiplication.
Definition functions.h:257

MatrixVectorAddition::getWeightsFile
std::string getWeightsFile()
Returns the path to the weights file.
Definition functions.h:1089

MatrixVectorAddition::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1041

MatrixVectorAddition::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the matrix-vector addition operation.
Definition functions.h:946

MatrixVectorAddition::MatrixVectorAddition
MatrixVectorAddition(std::string weightsFile, int num_cols)
Constructor that initializes the class with a file containing weights.
Definition functions.h:930

MatrixVectorAddition::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1071

MatrixVectorAddition::setWeights
void setWeights(float *weights)
Sets the weights for the function.
Definition functions.h:1098

MatrixVectorAddition::MatrixVectorAddition
MatrixVectorAddition(float *weights, int num_cols)
Constructor that initializes the class with a raw array of weights.
Definition functions.h:917

MatrixVectorAddition::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1080

MatrixVectorAddition::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1050

MaxPool::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3866

MaxPool::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3857

MaxPool::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies max pooling to the input array.
Definition functions.h:3770

MaxPool::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3848

MaxPool::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3836

MaxPool::MaxPool
MaxPool(int side, int rows, int cols)
Constructor that initializes the max pooling operation with dimensions.
Definition functions.h:3753

MinMaxScaler::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1864

MinMaxScaler::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1873

MinMaxScaler::MinMaxScaler
MinMaxScaler(std::string minMaxScalerDataPath)
Constructor that initializes the scaler with a file containing min and max values.
Definition functions.h:1700

MinMaxScaler::MinMaxScaler
MinMaxScaler(float *scalerMinValues, float *scalerMaxValues, int numCols)
Constructor that initializes the scaler with raw arrays of min and max values.
Definition functions.h:1687

MinMaxScaler::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying Min-Max scaling.
Definition functions.h:1892

MinMaxScaler::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies Min-Max scaling to the input array.
Definition functions.h:1755

MinMaxScaler::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1852

MinMaxScaler::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1882

Relu::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1305

Relu::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the ReLU function.
Definition functions.h:1345

Relu::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1326

Relu::reluFunction
static float reluFunction(float x)
Computes the ReLU function for a single input value.
Definition functions.h:1249

Relu::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the ReLU function to the input array.
Definition functions.h:1265

Relu::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1317

Relu::Relu
Relu()
Default constructor.
Definition functions.h:1241

Relu::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1335

Sigmoid::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the Sigmoid function.
Definition functions.h:1223

Sigmoid::sigmoidFunction
static float sigmoidFunction(float x)
Computes the Sigmoid function for a single input value.
Definition functions.h:1127

Sigmoid::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1183

Sigmoid::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1195

Sigmoid::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1213

Sigmoid::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the Sigmoid function to the input array.
Definition functions.h:1143

Sigmoid::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1204

Sigmoid::Sigmoid
Sigmoid()
Default constructor.
Definition functions.h:1119

Softmax::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the Softmax function.
Definition functions.h:1511

Softmax::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:1483

Softmax::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1471

Softmax::Softmax
Softmax()
Default constructor.
Definition functions.h:1364

Softmax::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:1501

Softmax::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the Softmax function to the input array.
Definition functions.h:1377

Softmax::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:1492

TorchCNN::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3578

TorchCNN::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3626

TorchCNN::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3617

TorchCNN::getBias
float * getBias() const
Returns the biases of the CNN.
Definition functions.h:3608

TorchCNN::getWeights
float * getWeights() const
Returns the weights of the CNN.
Definition functions.h:3599

TorchCNN::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the CNN to the input array using PyTorch.
Definition functions.h:3485

TorchCNN::TorchCNN
TorchCNN(float *weights, float *bias, int *dims_)
Constructor that initializes the CNN with weights, biases, and dimensions.
Definition functions.h:3467

TorchCNN::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3590

TorchConvolute::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the 2D convolution operation to the input array using PyTorch.
Definition functions.h:3356

TorchConvolute::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3443

TorchConvolute::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3434

TorchConvolute::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3413

TorchConvolute::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3425

TorchConvolute::TorchConvolute
TorchConvolute(float *weights, int *dims_)
Constructor that initializes the convolution operation with weights and dimensions.
Definition functions.h:3339

TorchDNN2Level::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the 2-level neural network to the input array.
Definition functions.h:1935

TorchDNN2Level::TorchDNN2Level
TorchDNN2Level(float **weights, float **bias, std::vector< int > dimensions)
Constructor that initializes the neural network with weights and biases.
Definition functions.h:1918

TorchDNN2Level::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2003

TorchDNN2Level::getBias
float ** getBias() const
Returns the biases of the neural network.
Definition functions.h:2021

TorchDNN2Level::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2049

TorchDNN2Level::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2039

TorchDNN2Level::getWeights
float ** getWeights() const
Returns the weights of the neural network.
Definition functions.h:2012

TorchDNN2Level::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:1991

TorchDNN2Level::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2030

TorchDNNKernel::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2985

TorchDNNKernel::TorchDNNKernel
TorchDNNKernel(std::string kernel, float *weights, float *bias, std::vector< int > dimensions)
Constructor that initializes the neural network kernel with weights, biases, and dimensions.
Definition functions.h:2909

TorchDNNKernel::getWeights
const float * getWeights() const
Returns the weights of the neural network kernel.
Definition functions.h:3006

TorchDNNKernel::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network kernel to the input array.
Definition functions.h:2931

TorchDNNKernel::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2997

TorchDNNKernel::getBias
const float * getBias() const
Returns the biases of the neural network kernel.
Definition functions.h:3015

TorchDNNKernel::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3033

TorchDNNKernel::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3024

TorchDNNV2CUDA::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2853

TorchDNNV2CUDA::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2881

TorchDNNV2CUDA::getWeights
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:2835

TorchDNNV2CUDA::getBias
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:2844

TorchDNNV2CUDA::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2862

TorchDNNV2CUDA::getKernelTypes
std::vector< velox::dl::KernelType > getKernelTypes() const
Returns the kernel types used in the neural network.
Definition functions.h:2871

TorchDNNV2CUDA::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2803

TorchDNNV2CUDA::TorchDNNV2CUDA
TorchDNNV2CUDA(std::vector< velox::dl::KernelType > kernelTypes, std::vector< float * > weights, std::vector< int > dimensions)
Constructor that initializes the neural network with kernel types, weights, and dimensions.
Definition functions.h:2619

TorchDNNV2CUDA::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network to the input array using CUDA.
Definition functions.h:2684

TorchDNNV2CUDA::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2826

TorchDNNV2::TorchDNNV2
TorchDNNV2(std::vector< velox::dl::KernelType > kernelTypes, std::vector< float * > weights, std::vector< int > dimensions)
Constructor that initializes the neural network with kernel types, weights, and dimensions.
Definition functions.h:2330

TorchDNNV2::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2565

TorchDNNV2::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2574

TorchDNNV2::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2515

TorchDNNV2::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network to the input array.
Definition functions.h:2399

TorchDNNV2::getKernelTypes
std::vector< velox::dl::KernelType > getKernelTypes() const
Returns the kernel types used in the neural network.
Definition functions.h:2583

TorchDNNV2::getBias
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:2556

TorchDNNV2::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2593

TorchDNNV2::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2538

TorchDNNV2::getWeights
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:2547

TorchDNN_Multi::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3149

TorchDNN_Multi::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3137

TorchDNN_Multi::getWeights
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:3158

TorchDNN_Multi::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the multi-layer neural network to the input array.
Definition functions.h:3079

TorchDNN_Multi::TorchDNN_Multi
TorchDNN_Multi(std::vector< float * > weights, std::vector< float * > bias, std::vector< int > dimensions)
Constructor that initializes the neural network with weights, biases, and layer dimensions.
Definition functions.h:3059

TorchDNN_Multi::getBias
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:3167

TorchDNN::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:2192

TorchDNN::getWeights
const std::vector< float * > & getWeights() const
Returns the weights of the neural network.
Definition functions.h:2174

TorchDNN::getBias
const std::vector< float * > & getBias() const
Returns the biases of the neural network.
Definition functions.h:2183

TorchDNN::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:2153

TorchDNN::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:2201

TorchDNN::TorchDNN
TorchDNN(std::vector< float * > weights, std::vector< float * > bias, std::vector< int > dimensions)
Constructor that initializes the neural network with weights, biases, and layer dimensions.
Definition functions.h:2075

TorchDNN::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the neural network to the input array.
Definition functions.h:2095

TorchDNN::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:2165

TorchDNN::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the neural network.
Definition functions.h:2211

VectorScalarAddition::VectorScalarAddition
VectorScalarAddition(float *weights, int size)
Constructor that initializes the vector-scalar addition with weights and size.
Definition functions.h:3649

VectorScalarAddition::getName
static std::string getName()
Static method to return the name of the function.
Definition functions.h:3731

VectorScalarAddition::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition functions.h:3701

VectorScalarAddition::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition functions.h:3713

VectorScalarAddition::getFuncName
std::string getFuncName()
Returns the name of the function.
Definition functions.h:3722

VectorScalarAddition::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies vector-scalar addition to the input array.
Definition functions.h:3665

velox::dl
Namespace for deep learning-related utilities and kernels.

velox::dl::KernelType
KernelType
Enumeration of kernel types used in deep learning operations.
Definition functions.h:2253

velox::dl::KernelType::Sigmoid
@ Sigmoid
Sigmoid activation kernel.
Definition functions.h:2260

velox::dl::KernelType::Argmax
@ Argmax
Argmax operation kernel.
Definition functions.h:2259

velox::dl::KernelType::Softmax
@ Softmax
Softmax activation kernel.
Definition functions.h:2257

velox::dl::KernelType::MatMul
@ MatMul
Matrix multiplication kernel.
Definition functions.h:2254

velox::dl::KernelType::BatchNorm
@ BatchNorm
Batch normalization kernel.
Definition functions.h:2258

velox::dl::KernelType::ReLU
@ ReLU
Rectified Linear Unit activation kernel.
Definition functions.h:2256

velox::dl::KernelType::MatAdd
@ MatAdd
Matrix addition kernel.
Definition functions.h:2255

velox::dl::operator<<
std::ostream & operator<<(std::ostream &os, KernelType kernelType)
Overloads the << operator for KernelType.
Definition functions.h:2295

velox::dl::kernelTypeToString
std::string kernelTypeToString(KernelType kernelType)
Converts a KernelType enum value to its string representation.
Definition functions.h:2268