cactusdb.github.io/ChatGPT_8h_source.html

/*

 * Copyright (c) 2025 ASU Cactus Lab.

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


class ChatGPT : public MLFunction {

public:


    ChatGPT() {

        apiKey_ = getEnvVar("OPENAI_API_KEY");

        if (apiKey_ == "") {

            throw std::runtime_error("[ERROR] OpenAI API key is not set, please set OPENAI_API_KEY");

        }

        numThreads_ = getEnvVar("NUM_THREADS") == "" ? 8 : std::stoi(getEnvVar("NUM_THREADS"));

        url_ = "https://api.openai.com/v1/chat/completions";

        model_ = "gpt-3.5-turbo";

        inputTokenNumber_ = 0;

        outputTokenNumber_ = 0;

        numFailures_ = 0;

    }


    ChatGPT(std::string url, std::string model) {

        apiKey_ = getEnvVar("OPENAI_API_KEY");

        if (apiKey_ == "") {

            throw std::runtime_error("[ERROR] OpenAI API key is not set, please set OPENAI_API_KEY");

        }

        numThreads_ = getEnvVar("NUM_THREADS") == "" ? 8 : std::stoi(getEnvVar("NUM_THREADS"));

        url_ = url;

        model_ = model;

        inputTokenNumber_ = 0;

        outputTokenNumber_ = 0;

        numFailures_ = 0;

    }


    ~ChatGPT() {

        std::string filename = "chatgpt.log";

        std::ofstream file(filename, std::ios::app);

        if (!file) {

            std::cerr << "Unable to open file: " << filename << std::endl;

            return;

        }

        auto now = std::chrono::system_clock::now();

        std::time_t now_c = std::chrono::system_clock::to_time_t(now);

        file << std::put_time(std::localtime(&now_c), "%Y-%m-%d %H:%M:%S") << " ";

        file << "[ChatGPT] # Input:" << inputTokenNumber_

             << " # Output: " << outputTokenNumber_

             << " # NumFailure: " << numFailures_ << std::endl;

        file.close();

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::string promptPrefix = "";

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    exec::LocalDecodedVector decodedStringHolder(context, *args[0], rows);

    auto decodedStringInput = decodedStringHolder.get();


    int numInput = rows.size();

    int numSelected = rows.countSelected();

    LOG(INFO) << "[INFO ChatGPT:] countSelected: " << rows.countSelected()

              << " numInput: " << numInput << std::endl;


    if (args.size() == 2) {

      exec::LocalDecodedVector decodedStringHolder2(context, *args[1], rows);

      auto decodedStringInput2 = decodedStringHolder2.get();

      StringView val = decodedStringInput2->valueAt<StringView>(0);

      promptPrefix = std::string(val);

    }


    std::vector<std::string> results;


    cpr::Header headers{

        {"Content-Type", "application/json"},

        {"Authorization", "Bearer " + apiKey_}};


    // Thread vector

    std::vector<std::thread> threads;

    std::vector<cpr::Response> responses(numInput);

    std::vector<int> numFailureVector(numInput);


    int numInputsPerThread = int(std::ceil(float(numSelected) / numThreads_));

    int processedInputCount = 0;

    std::vector<std::string> payloadsBatchVector;

    int processedIndex = 0;


    // Version 1

    // This approach is more efficient by sending requests in batches and

    // leveraging multiple threads to send requests concurrently, it requires

    // additional isValid check to skip the rows that are not selected. Note: at

    // the end of this approach, it is required to invoke

    // context.moveOrCopyResult to copy the results back to the output vector

    // since we only compute the results for selected ones

    for (int i = 0; i < numInput; i++) {

      // if the row is not selected, skip

      if (!rows.isValid(i)) {

        continue;

      }

      StringView val = decodedStringInput->valueAt<StringView>(i);

      std::string valString = promptPrefix + std::string(val);

      nlohmann::json messageArrays = nlohmann::json::array();

      // Add message

      messageArrays.push_back({{"role", "user"}, {"content", valString}});


      nlohmann::json payload = {

          {"model", model_}, {"messages", messageArrays}, {"max_tokens", 150}};


      payloadsBatchVector.push_back(payload.dump());

      processedInputCount++;


      if (processedInputCount == numInputsPerThread || i == numInput - 1) {

        threads.emplace_back(

            sendRequestViaCprBatch,

            url_,

            headers,

            payloadsBatchVector,

            std::ref(responses),

            processedIndex - processedInputCount + 1,

            std::ref(numFailureVector));

        processedInputCount = 0;

        payloadsBatchVector.clear();

      }

      processedIndex++;

    }


    for (auto& thread : threads) {

      thread.join();

    }


    for (int i = 0; i < numSelected; i++) {

      if (responses[i].status_code == 200) {

        // parse the returned value

        nlohmann::json response_json = nlohmann::json::parse(responses[i].text);

        std::string generated_message =

            response_json["choices"][0]["message"]["content"];

        results.push_back(generated_message);

        const_cast<uint64_t&>(inputTokenNumber_) = inputTokenNumber_ +

            response_json["usage"]["prompt_tokens"].get<int>();

        const_cast<uint64_t&>(outputTokenNumber_) = outputTokenNumber_ +

            response_json["usage"]["completion_tokens"].get<int>();

        const_cast<uint64_t&>(numFailures_) =

            numFailures_ + numFailureVector[i];

        if (numFailureVector[i] > 0) {

          LOG(WARNING)

              << "[WARNING] Failed to send request to OpenAI API. Number of retries: "

              << numFailureVector[i] << " numFailures_: " << numFailures_

              << std::endl;

        }

        LOG(INFO) << fmt::format(

                         "[INFO] i: {} / {}, results: {}, numFailures: {}",

                         i + 1,

                         numSelected,

                         generated_message,

                         numFailureVector[i])

                  << std::endl;

      } else {

        LOG(ERROR) << "Error: " << responses[i].status_code << " - "

                   << responses[i].text << std::endl;

      }

    }


    VectorMaker maker{context.pool()};

    VectorPtr localResult = maker.flatVector<std::string>(results);


    context.moveOrCopyResult(localResult, rows, output);

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {

            exec::FunctionSignatureBuilder()

                .argumentType("VARCHAR")

                .returnType("VARCHAR")

                .build(),

            exec::FunctionSignatureBuilder()

                .argumentType("VARCHAR")

                .argumentType("VARCHAR")

                .returnType("VARCHAR")

                .build()};

    }


    float* getTensor() const override {

        return nullptr;

    }


    static std::string getName() {

        return "chatgpt";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        return CostEstimate(0, inputDims[0], inputDims[1]);

    }


private:

    std::string apiKey_;

    std::string url_;

    std::string model_;

    uint64_t inputTokenNumber_;

    uint64_t outputTokenNumber_;

    uint64_t numFailures_;

    int numThreads_;

};


class ChatGPTRecommender : public MLFunction {

public:


    ChatGPTRecommender() {

        apiKey_ = getEnvVar("OPENAI_API_KEY");

        if (apiKey_ == "") {

            throw std::runtime_error("[ERROR] OpenAI API key is not set, please set OPENAI_API_KEY");

        }

        numThreads_ = getEnvVar("NUM_THREADS") == "" ? 8 : std::stoi(getEnvVar("NUM_THREADS"));

        url_ = "https://api.openai.com/v1/chat/completions";

        model_ = "gpt-3.5-turbo";

        inputTokenNumber_ = 0;

        outputTokenNumber_ = 0;

        numFailures_ = 0;

    }


    ChatGPTRecommender(std::string url, std::string model) {

        apiKey_ = getEnvVar("OPENAI_API_KEY");

        if (apiKey_ == "") {

            throw std::runtime_error("[ERROR] OpenAI API key is not set, please set OPENAI_API_KEY");

        }

        numThreads_ = getEnvVar("NUM_THREADS") == "" ? 8 : std::stoi(getEnvVar("NUM_THREADS"));

        url_ = url;

        model_ = model;

        inputTokenNumber_ = 0;

        outputTokenNumber_ = 0;

        numFailures_ = 0;

    }


    ~ChatGPTRecommender() {

        std::string filename = "chatgpt.log";

        std::ofstream file(filename, std::ios::app);

        if (!file) {

            std::cerr << "Unable to open file: " << filename << std::endl;

            return;

        }

        auto now = std::chrono::system_clock::now();

        std::time_t now_c = std::chrono::system_clock::to_time_t(now);

        file << std::put_time(std::localtime(&now_c), "%Y-%m-%d %H:%M:%S") << " ";

        file << "[ChatGPT Recommender] # Input:" << inputTokenNumber_

             << " # Output: " << outputTokenNumber_

             << " # NumFailure: " << numFailures_ << std::endl;

        file.close();

    }


    void apply(

        const SelectivityVector& rows,

        std::vector<VectorPtr>& args,

        const TypePtr& type,

        exec::EvalCtx& context,

        VectorPtr& output) const override {

    std::string promptSuffix = "";

    BaseVector::ensureWritable(rows, type, context.pool(), output);


    exec::LocalDecodedVector decodedStringHolder1(context, *args[0], rows);

    auto decodedStringInput1 = decodedStringHolder1.get();


    exec::LocalDecodedVector decodedStringHolder2(context, *args[1], rows);

    auto decodedStringInput2 = decodedStringHolder2.get();


    int numInput = rows.size();

    int numSelected = rows.countSelected();

    LOG(INFO) << "[INFO ChatGPTRecommender:] countSelected: "

              << rows.countSelected() << " numInput: " << numInput << std::endl;


    if (args.size() == 3) {

      exec::LocalDecodedVector decodedStringHolder3(context, *args[2], rows);

      auto decodedStringInput3 = decodedStringHolder3.get();

      StringView val = decodedStringInput3->valueAt<StringView>(0);

      promptSuffix = std::string(val);

    }


    std::vector<std::string> results;


    cpr::Header headers{

        {"Content-Type", "application/json"},

        {"Authorization", "Bearer " + apiKey_}};


    // Thread vector

    std::vector<std::thread> threads;

    std::vector<cpr::Response> responses(numInput);

    std::vector<int> numFailureVector(numInput);


    int numInputsPerThread = int(std::ceil(float(numSelected) / numThreads_));

    int processedInputCount = 0;

    std::vector<std::string> payloadsBatchVector;

    int processedIndex = 0;


    for (int i = 0; i < numInput; i++) {

      // if the row is not selected, skip

      if (!rows.isValid(i)) {

        continue;

      }

      StringView val1 = decodedStringInput1->valueAt<StringView>(i);

      StringView val2 = decodedStringInput2->valueAt<StringView>(i);

      std::string valString =

          "Summarized user statistics data (preference): " + std::string(val1) +

          ". \n Summarized user movie metadata:  " + std::string(val2) + ".\n" +

          promptSuffix;

      nlohmann::json messageArrays = nlohmann::json::array();

      // Add message

      messageArrays.push_back({{"role", "user"}, {"content", valString}});


      nlohmann::json payload = {

          {"model", model_}, {"messages", messageArrays}, {"max_tokens", 500}};


      payloadsBatchVector.push_back(payload.dump());

      processedInputCount++;


      if (processedInputCount == numInputsPerThread || i == numInput - 1) {

        threads.emplace_back(

            sendRequestViaCprBatch,

            url_,

            headers,

            payloadsBatchVector,

            std::ref(responses),

            processedIndex - processedInputCount + 1,

            std::ref(numFailureVector));

        processedInputCount = 0;

        payloadsBatchVector.clear();

      }

      processedIndex++;

    }


    for (auto& thread : threads) {

      thread.join();

    }


    for (int i = 0; i < numSelected; i++) {

      if (responses[i].status_code == 200) {

        // parse the returned value

        nlohmann::json response_json = nlohmann::json::parse(responses[i].text);

        std::string generated_message =

            response_json["choices"][0]["message"]["content"];

        results.push_back(generated_message);

        const_cast<uint64_t&>(inputTokenNumber_) = inputTokenNumber_ +

            response_json["usage"]["prompt_tokens"].get<int>();

        const_cast<uint64_t&>(outputTokenNumber_) = outputTokenNumber_ +

            response_json["usage"]["completion_tokens"].get<int>();

        const_cast<uint64_t&>(numFailures_) =

            numFailures_ + numFailureVector[i];

        if (numFailureVector[i] > 0) {

          LOG(WARNING)

              << "[WARNING] Failed to send request to OpenAI API. Number of retries: "

              << numFailureVector[i] << " numFailures_: " << numFailures_

              << std::endl;

        }

        LOG(INFO) << fmt::format(

                         "[INFO] i: {} / {}, results: {}, numFailures: {}",

                         i + 1,

                         numSelected,

                         generated_message,

                         numFailureVector[i])

                  << std::endl;

      } else {

        LOG(ERROR) << "Error: " << responses[i].status_code << " - "

                   << responses[i].text << std::endl;

      }

    }


    VectorMaker maker{context.pool()};

    VectorPtr localResult = maker.flatVector<std::string>(results);


    context.moveOrCopyResult(localResult, rows, output);

    }


    static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {

        return {

            exec::FunctionSignatureBuilder()

                .argumentType("VARCHAR")

                .argumentType("VARCHAR")

                .returnType("VARCHAR")

                .build(),

            exec::FunctionSignatureBuilder()

                .argumentType("VARCHAR")

                .argumentType("VARCHAR")

                .argumentType("VARCHAR")

                .returnType("VARCHAR")

                .build()};

    }


    float* getTensor() const override {

        return nullptr;

    }


    static std::string getName() {

        return "chatgpt_recommender";

    }


    CostEstimate getCost(std::vector<int> inputDims) {

        return CostEstimate(0, inputDims[0], inputDims[1]);

    }


private:

    std::string apiKey_;

    std::string url_;

    std::string model_;

    uint64_t inputTokenNumber_;

    uint64_t outputTokenNumber_;

    uint64_t numFailures_;

    int numThreads_;

};


getEnvVar
std::string getEnvVar(std::string const &key)
Retrieves the value of an environment variable.
Definition UtilFunction.h:483

ChatGPTRecommender::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition ChatGPT.h:504

ChatGPTRecommender::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition ChatGPT.h:484

ChatGPTRecommender::getName
static std::string getName()
Returns the name of the function.
Definition ChatGPT.h:513

ChatGPTRecommender::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the ChatGPTRecommender function to the input array.
Definition ChatGPT.h:358

ChatGPTRecommender::ChatGPTRecommender
ChatGPTRecommender()
Default constructor.
Definition ChatGPT.h:290

ChatGPTRecommender::~ChatGPTRecommender
~ChatGPTRecommender()
Destructor.
Definition ChatGPT.h:330

ChatGPTRecommender::ChatGPTRecommender
ChatGPTRecommender(std::string url, std::string model)
Constructor with custom URL and model.
Definition ChatGPT.h:312

ChatGPTRecommender::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the ChatGPTRecommender function.
Definition ChatGPT.h:523

ChatGPT::getTensor
float * getTensor() const override
Returns the tensor associated with this function.
Definition ChatGPT.h:243

ChatGPT::getCost
CostEstimate getCost(std::vector< int > inputDims)
Estimates the computational cost of applying the ChatGPT function.
Definition ChatGPT.h:262

ChatGPT::ChatGPT
ChatGPT()
Default constructor.
Definition ChatGPT.h:31

ChatGPT::signatures
static std::vector< std::shared_ptr< exec::FunctionSignature > > signatures()
Returns the function signatures supported by this class.
Definition ChatGPT.h:225

ChatGPT::getName
static std::string getName()
Returns the name of the function.
Definition ChatGPT.h:252

ChatGPT::ChatGPT
ChatGPT(std::string url, std::string model)
Constructor with custom URL and model.
Definition ChatGPT.h:53

ChatGPT::apply
void apply(const SelectivityVector &rows, std::vector< VectorPtr > &args, const TypePtr &type, exec::EvalCtx &context, VectorPtr &output) const override
Applies the ChatGPT function to the input array.
Definition ChatGPT.h:99

ChatGPT::~ChatGPT
~ChatGPT()
Destructor.
Definition ChatGPT.h:71

MLFunction
A base class for machine learning functions, inheriting from Velox's VectorFunction.
Definition BaseFunction.h:9