25 Node
forest[MAX_NUM_TREES][MAX_NUM_NODES_PER_TREE];
52 std::string pathToFolder,
53 std::vector<std::string>& pathVector) {
54 if (pathToFolder[pathToFolder.length() - 1] !=
'/') {
55 pathToFolder = pathToFolder + std::string(
"/");
58 DIR* dr = opendir(pathToFolder.c_str());
60 struct dirent* file = NULL;
62 while ((file = readdir(dr)) != NULL) {
63 if ((strcmp(file->d_name,
".") != 0) && (strcmp(file->d_name,
".."))) {
64 std::string path = pathToFolder + std::string(file->d_name);
66 pathVector.push_back(path);
79 std::vector<std::string> treePaths;
92 this->numTrees = treesPathIn.size();
95 Tree::constructTreeFromPath(treesPathIn[n], &(
forest[n][0]));
100 <<
"[Forest-constructForestFromPaths] Number of trees in the forest: "
114 std::vector<float>& resultVector,
117 auto inputFeatures = input->as<ArrayVector>()->elements();
119 float* inputValues = inputFeatures->values()->asMutable<
float>();
121 float* outData = resultVector.data();
123 for (
int rowIndex = 0; rowIndex < numInputs; rowIndex++) {
124 int curBase = rowIndex * numFeatures;
126 float accumulatedResult = 0.0;
128 for (
int treeIndex = 0; treeIndex <
numTrees; treeIndex++) {
131 Node* tree =
forest[treeIndex];
133 while (!tree[curIndex].isLeaf) {
134 const float featureValue =
135 inputValues[curBase + tree[curIndex].indexID];
137 curIndex = featureValue < tree[curIndex].threshold
138 ? tree[curIndex].leftChild
139 : tree[curIndex].rightChild;
142 accumulatedResult += (float)(tree[curIndex].leafValue);
148 accumulatedResult = (accumulatedResult > 0.0) ? 1.0 : 0.0;
151 outData[rowIndex] = accumulatedResult;
171 std::string forestPath,
173 bool isClassification) {
174 if (!std::filesystem::exists(forestPath)) {
175 throw std::runtime_error(
176 "[ForestPrediction] Path not exists: " + forestPath);
179 this->forest = std::make_shared<Forest>(forestPath, isClassification);
181 this->numFeatures = numFeatures;
183 this->forestPath = forestPath;
185 this->isClassification = isClassification;
201 const SelectivityVector& rows,
202 std::vector<VectorPtr>& args,
204 exec::EvalCtx& context,
205 VectorPtr& output)
const override {
206 BaseVector::ensureWritable(rows, type, context.pool(), output);
208 int numInputs = rows.size();
210 std::vector<float> resultVector(numInputs);
212 this->forest->predict(args[0], resultVector, numInputs, this->numFeatures);
214 VectorMaker maker{context.pool()};
216 output = maker.flatVector<
float>(resultVector, REAL());
224 static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
225 return {exec::FunctionSignatureBuilder()
226 .argumentType(
"array(REAL)")
236 float* getTensor()
const override {
245 static std::string getName() {
246 return "tree_predict";
254 int getNumFeatures() {
263 std::string& getForestPath() {
264 return this->forestPath;
272 bool getClassification() {
273 return this->isClassification;
279 std::string forestPath;
280 bool isClassification;
A class that implements a random forest prediction function, inheriting from MLFunction.
void predict(VectorPtr &input, std::vector< float > &resultVector, int numInputs, int numFeatures)
Makes predictions using the forest.
Definition DecisionForest.h:112
bool isClassification
Flag indicating whether the forest is used for classification.
Definition DecisionForest.h:27
static void vectorizeForestFolder(std::string pathToFolder, std::vector< std::string > &pathVector)
Scans a folder and collects paths to tree files.
Definition DecisionForest.h:51
void constructForestFromPaths(std::vector< std::string > &treesPathIn)
Constructs the forest from a list of tree file paths.
Definition DecisionForest.h:91
void constructForestFromFolder(std::string pathToFolder)
Constructs the forest from a folder of tree files.
Definition DecisionForest.h:78
Forest(std::string pathToFolder, bool isClassification)
Constructor that initializes the forest from a folder of tree files.
Definition DecisionForest.h:40
int numTrees
Number of trees in the forest.
Definition DecisionForest.h:26
Forest()
Default constructor.
Definition DecisionForest.h:32
Node forest[MAX_NUM_TREES][MAX_NUM_NODES_PER_TREE]
Array of trees in the forest.
Definition DecisionForest.h:25
A base class for machine learning functions, inheriting from Velox's VectorFunction.
Definition BaseFunction.h:9