Skip to main content

Quant.h File

Quant Node — CVU kernel that quantizes FP32 to INT8 (scale + zero-point). More...

Included Headers

#include "builder/NodeContractConfigurable.h" #include "builder/NodeContractProvider.h" #include "builder/Node.h" #include <nlohmann/json.hpp> #include <memory> #include <optional> #include <string> #include <vector>

Namespaces Index

namespacesimaai
namespaceneat
namespacenodes

Classes Index

structQuantOptions

Construction options for a Quant Node. More...

classQuant

CVU kernel Node that quantizes FP32 tensors to INT8 using scale + zero-point. More...

Description

Quant Node — CVU kernel that quantizes FP32 to INT8 (scale + zero-point).

Reads FP32 tensors and emits INT8 using the per-tensor scale and zero-point bound to the model. Inserted at the MLA boundary on the INT8 path before the MLA when the upstream stage emits FP32.

See Also

"The dtype contract" page in /concepts/dtype_contract

File Listing

The file content with the documentation metadata removed is:

1
12#pragma once
13
14#include "builder/NodeContractConfigurable.h"
15#include "builder/NodeContractProvider.h"
16#include "builder/Node.h"
17#ifdef SIMA_NEAT_INTERNAL
18#include "model/internal/ModelRouteRetarget.h"
19#endif
20
21#include <nlohmann/json.hpp>
22
23#include <memory>
24#include <optional>
25#include <string>
26#include <vector>
27
28namespace simaai::neat {
29class Model;
30struct CompiledProcessCvuContract;
31
39 QuantOptions() = default;
41 explicit QuantOptions(const simaai::neat::Model& model);
42
43 std::string config_path;
44 std::optional<nlohmann::json>
46 std::string element_name;
47 std::shared_ptr<const CompiledProcessCvuContract>
49 int num_buffers = 0;
51 bool num_buffers_locked = false;
52#ifdef SIMA_NEAT_INTERNAL
53 std::shared_ptr<const simaai::neat::internal::ModelLineageBinding> model_lineage;
54#endif
55};
56
68class Quant final : public Node, public NodeContractProvider, public NodeContractConfigurable {
69public:
71 explicit Quant(QuantOptions opt = {});
72 struct ConfigHolder;
73
75 std::string kind() const override {
76 return "Quant";
77 }
79 NodeCapsBehavior caps_behavior() const override {
80 return NodeCapsBehavior::Static;
81 }
85 bool compile_node_contract(const ContractCompileInput& input, CompiledNodeContract* out,
86 std::string* err) const override;
88 void apply_compiled_contract(const CompiledNodeContract& contract, std::string* err) override;
90 std::string backend_fragment(int node_index) const override;
92 std::vector<std::string> element_names(int node_index) const override;
93
95 const nlohmann::json* config_json() const;
96
98 const QuantOptions& options() const {
99 return opt_;
100 }
102 const std::string& config_path() const {
103 return config_path_;
104 }
105
106#ifdef SIMA_NEAT_INTERNAL
107 const std::optional<CompiledProcessCvuContract>& compiled_contract_internal() const;
108#endif
109
110private:
111 QuantOptions opt_;
112 std::shared_ptr<ConfigHolder> config_holder_;
113 std::string config_path_;
114};
115
116} // namespace simaai::neat
117
118namespace simaai::neat::nodes {
120std::shared_ptr<simaai::neat::Node> Quant(QuantOptions opt = {});
121} // namespace simaai::neat::nodes

Generated via doxygen2docusaurus 2.0.0 by Doxygen 1.9.8.