cactusdb.github.io/BatchNorm_8h_source.html

/*

 * Copyright (c) 2025 ASU Cactus Lab.

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


class BatchNorm1D : public MLFunction {

public:


    BatchNorm1D(float* weights, float* bias, int numDims, float eps = 1e-05) {

        weights_ = new float[numDims];

        bias_ = new float[numDims];

        std::memcpy(weights_, weights, numDims * sizeof(float));

        std::memcpy(bias_, bias, numDims * sizeof(float));

        eps_ = eps;

        dims.push_back(numDims);

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    BaseVector::ensureWritable(rows, type, context.pool(), output);

    output->clearNulls(rows);

    auto arrayOutput = output->as<ArrayVector>();

    auto sizes = arrayOutput->mutableSizes(rows.end());

    auto rawSizes = sizes->asMutable<int32_t>();

    auto offsets = arrayOutput->mutableOffsets(rows.end());

    auto rawOffsets = offsets->asMutable<int32_t>();


    // Initialize sizes and offsets to zero.

    std::fill(rawSizes, rawSizes + rows.end(), 0);

    std::fill(rawOffsets, rawOffsets + rows.end(), 0);


    auto elementsOutput = arrayOutput->elements();

    auto elementsPool = context.pool();


    exec::DecodedArgs decodedArgs(rows, args, context);

    auto decodedInput = decodedArgs.at(0);

    auto numRows = rows.size();


    auto inputArray = decodedInput->base()->as<ArrayVector>();

    auto inputElements = inputArray->elements();

    float* inputValues = inputElements->values()->asMutable<float>();

    auto inputOffsets = inputArray->rawOffsets();

    auto inputSizes = inputArray->rawSizes();


    std::map<vector_size_t, vector_size_t> rowMap;

    std::unordered_set<vector_size_t> uniqueRawIndexeSet;

    std::vector<vector_size_t> uniqueRawIndexeVector;

    vector_size_t numUniqueRows = 0;

    int numCols = dims[0];

    rows.applyToSelected([&](vector_size_t row) {

      auto mappedIndexInRowData = decodedInput->index(row);

      if (uniqueRawIndexeSet.find(mappedIndexInRowData) ==

          uniqueRawIndexeSet.end()) {

        // add it

        rowMap[row] = numUniqueRows;

        uniqueRawIndexeSet.insert(mappedIndexInRowData);

        uniqueRawIndexeVector.push_back(mappedIndexInRowData);

        ++numUniqueRows;

      } else {

        // already added

        rowMap[row] = rowMap[mappedIndexInRowData];

      }

    });


    int numInputMatrixRows = numUniqueRows;

    Eigen::MatrixXf inputMatrix(numInputMatrixRows, numCols);

    int rowIndex = 0;

    for (auto rawIndex : uniqueRawIndexeVector) {

      Eigen::Map<const Eigen::VectorXf> rowVector(

          inputValues + inputOffsets[rawIndex], numCols);

      inputMatrix.row(rowIndex++) = rowVector;

    }


    Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>

        resultMatrix(numInputMatrixRows, numCols);

    for (int i = 0; i < numCols; i++) {

      Eigen::VectorXf colData = inputMatrix.col(i);

      float colMean = colData.mean();

      float colVariance =

          (colData.array() - colMean).square().sum() / (numInputMatrixRows - 1);


      resultMatrix.col(i) =

          (colData.array() - colMean) / sqrt(colVariance + eps_) * weights_[i] +

          bias_[i];

    }


    auto baseOffset = elementsOutput->size();

    elementsOutput->resize(baseOffset + rows.end() * numCols);

    float* outputValues = elementsOutput->values()->asMutable<float>();

    vector_size_t outputOffset = 0;

    rows.applyToSelected([&](vector_size_t row) {

      if (rowMap.find(row) == rowMap.end()) {

        throw std::runtime_error(

            "Mapped index not found for the result matrix.");

      }

      auto mappedIndexInResultMatrix = rowMap[row];

      rawOffsets[row] = outputOffset;

      rawSizes[row] = numCols;

      std::memcpy(

          outputValues + outputOffset,

          resultMatrix.row(mappedIndexInResultMatrix).data(),

          numCols * sizeof(float));

    });


    arrayOutput->setElements(elementsOutput);

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {exec::FunctionSignatureBuilder()

                    .argumentType("array(REAL)")

                    .returnType("array(REAL)")

                    .build()};

    }


    float* getTensor() const override {

        return weights_;

    }


    float* getWeight() {

        return weights_;

    }


    float* getBias() {

        return bias_;

    }


    static std::string getName() {

        return "batch_norm_1d";

    }


    std::string getWeightsFile() {

        return weightsFile_;

    }


    void setWeights(float* weights) {

        weights_ = weights;

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        return CostEstimate(0, inputDims[0], inputDims[1]);

    }


private:

    float* weights_;

    float* bias_;

    float eps_;

    std::string weightsFile_;

    std::string biasFile_;

};


BatchNorm1D::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying batch normalization.
Definition BatchNorm.h:220

BatchNorm1D::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition BatchNorm.h:153

BatchNorm1D::getBias
float * getBias()
Returns the biases of the batch normalization.
Definition BatchNorm.h:183

BatchNorm1D::getWeightsFile
std::string getWeightsFile()
Returns the path to the weights file.
Definition BatchNorm.h:201

BatchNorm1D::setWeights
void setWeights(float *weights)
Sets the weights for the batch normalization.
Definition BatchNorm.h:210

BatchNorm1D::getWeight
float * getWeight()
Returns the weights of the batch normalization.
Definition BatchNorm.h:174

BatchNorm1D::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition BatchNorm.h:165

BatchNorm1D::BatchNorm1D
BatchNorm1D(float *weights, float *bias, int numDims, float eps=1e-05)
Constructor that initializes the batch normalization operation with weights, biases,...
Definition BatchNorm.h:34

BatchNorm1D::getName
static std::string getName()
Returns the name of the function.
Definition BatchNorm.h:192

BatchNorm1D::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies 1D batch normalization to the input array.
Definition BatchNorm.h:54

MLFunction
A base class for machine learning functions, inheriting from Velox's VectorFunction.
Definition BaseFunction.h:9

MLFunction::dims
std::vector< int > dims
Dimensions of the function.
Definition BaseFunction.h:61