Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/infinicore/adaptor/aten_adaptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#include <ATen/ATen.h>

#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_QY_API)
#include <c10/cuda/CUDAStream.h>
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <c10/cuda/CUDAStream.h>
#endif

namespace infinicore::adaptor {
Expand Down
1 change: 1 addition & 0 deletions include/infinicore/nn.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include "nn/embedding.hpp"
#include "nn/layernorm.hpp"
#include "nn/linear.hpp"
#include "nn/rmsnorm.hpp"
10 changes: 5 additions & 5 deletions include/infinicore/nn/embedding.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once

#include "module.hpp"
#include "../ops.hpp"
#include "module.hpp"
#include <optional>

namespace infinicore::nn {
Expand Down Expand Up @@ -78,10 +78,10 @@ class Embedding : public Module {
INFINICORE_NN_PARAMETER(weight);

private:
size_t num_embeddings_; // Vocabulary size
size_t embedding_dim_; // Embedding dimension
std::optional<int64_t> padding_idx_; // Optional padding index
DataType dtype_; // Data type for embedding weights
size_t num_embeddings_; // Vocabulary size
size_t embedding_dim_; // Embedding dimension
std::optional<int64_t> padding_idx_; // Optional padding index
DataType dtype_; // Data type for embedding weights
};

} // namespace infinicore::nn
60 changes: 60 additions & 0 deletions include/infinicore/nn/layernorm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once

#include "../ops.hpp"
#include "module.hpp"

namespace infinicore::nn {

/**
* @brief Layer Normalization
*
* Applies LayerNorm over the last dimension.
*
* Formula: y = (x - mean) / sqrt(var + eps) * weight + bias
*/
class LayerNorm : public Module {
public:
/**
* @brief Construct a LayerNorm layer
*
* @param normalized_shape Size of the feature dimension to normalize (typically hidden_size)
* @param eps Small constant for numerical stability (default: 1e-5)
* @param dtype Data type for the weight/bias (default: DataType::F32)
* @param device Device to create the parameters on
*/
LayerNorm(size_t normalized_shape,
double eps = 1e-5,
const DataType &dtype = DataType::F32,
const Device &device = Device());

/**
* @brief Forward pass: apply LayerNorm
*
* @param x Input tensor of shape (*, normalized_shape)
* @return Normalized tensor with same shape as input
*/
Tensor forward(const Tensor &x) const;

// Module information
size_t normalized_shape() const { return normalized_shape_; }
double eps() const { return eps_; }
DataType dtype() const { return dtype_; }

// String representation
std::string extra_repr() const;

// Accessors for parameters
Tensor weight() const { return weight_; }
Tensor bias() const { return bias_; }

protected:
INFINICORE_NN_PARAMETER(weight);
INFINICORE_NN_PARAMETER(bias);

private:
size_t normalized_shape_;
double eps_;
DataType dtype_;
};

} // namespace infinicore::nn
2 changes: 1 addition & 1 deletion include/infinicore/nn/module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#include "../tensor.hpp"
#include "parameter.hpp"

#include <spdlog/spdlog.h>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include <spdlog/spdlog.h>

namespace infinicore::nn {
class Module {
Expand Down
8 changes: 8 additions & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,34 @@
#include "ops/binary_cross_entropy_with_logits.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cdist.hpp"
#include "ops/conv2d.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
#include "ops/fmin.hpp"
#include "ops/fmod.hpp"
#include "ops/gelu.hpp"
#include "ops/gelutanh.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/layer_norm.hpp"
#include "ops/linear.hpp"
#include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/paged_attention.hpp"
#include "ops/paged_attention_prefill.hpp"
#include "ops/paged_caching.hpp"
#include "ops/per_tensor_dequant_i8.hpp"
#include "ops/per_tensor_quant_i8.hpp"
#include "ops/quickgelu.hpp"
#include "ops/random_sample.hpp"
#include "ops/rearrange.hpp"
#include "ops/reciprocal.hpp"
#include "ops/relu.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
#include "ops/silu_and_mul.hpp"
#include "ops/softmax.hpp"
#include "ops/swiglu.hpp"
38 changes: 38 additions & 0 deletions include/infinicore/ops/conv2d.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

#include <cstddef>
#include <vector>

namespace infinicore::op {
class Conv2d {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor,
const size_t *, const size_t *, const size_t *, size_t);
static void execute(Tensor output,
Tensor input,
Tensor weight,
Tensor bias,
const size_t *pads,
const size_t *strides,
const size_t *dilations,
size_t n);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor conv2d(Tensor input,
Tensor weight,
Tensor bias,
const std::vector<size_t> &pads,
const std::vector<size_t> &strides,
const std::vector<size_t> &dilations);
void conv2d_(Tensor output,
Tensor input,
Tensor weight,
Tensor bias,
const std::vector<size_t> &pads,
const std::vector<size_t> &strides,
const std::vector<size_t> &dilations);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/gelu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Gelu {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor gelu(Tensor input);
void gelu_(Tensor output, Tensor input);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/gelutanh.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class GeluTanh {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor gelu_tanh(Tensor input);
void gelu_tanh_(Tensor output, Tensor input);
} // namespace infinicore::op
28 changes: 28 additions & 0 deletions include/infinicore/ops/layer_norm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class LayerNorm {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, float);
static void execute(Tensor output,
Tensor input_standardization,
Tensor input_std_deviation,
Tensor input,
Tensor weight,
Tensor bias,
float epsilon);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor layer_norm(Tensor input, Tensor weight, Tensor bias, float epsilon = 1e-5f);
void layer_norm_(Tensor output,
Tensor input_standardization,
Tensor input_std_deviation,
Tensor input,
Tensor weight,
Tensor bias,
float epsilon = 1e-5f);
} // namespace infinicore::op
16 changes: 8 additions & 8 deletions include/infinicore/ops/mha_kvcache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ namespace infinicore::op {

INFINICORE_GRAPH_OP_CLASS(
MhaKVCache,
Tensor, // out
const Tensor &, // q
const Tensor &, // k_cache
const Tensor &, // v_cache
const Tensor &, // seqlens_k
const Tensor &, // block_table
std::optional<Tensor>, // alibi_slopes
float); // scale
Tensor, // out
const Tensor &, // q
const Tensor &, // k_cache
const Tensor &, // v_cache
const Tensor &, // seqlens_k
const Tensor &, // block_table
std::optional<Tensor>, // alibi_slopes
float); // scale

Tensor mha_kvcache(const Tensor &q,
const Tensor &k_cache,
Expand Down
16 changes: 16 additions & 0 deletions include/infinicore/ops/quickgelu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class QuickGelu {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor quick_gelu(Tensor input);
void quick_gelu_(Tensor output, Tensor input);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/relu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Relu {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor relu(Tensor input);
void relu_(Tensor output, Tensor input);
} // namespace infinicore::op
16 changes: 16 additions & 0 deletions include/infinicore/ops/softmax.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Softmax {
public:
using schema = void (*)(Tensor, Tensor, int);
static void execute(Tensor output, Tensor input, int axis);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor softmax(Tensor input, int axis = -1);
void softmax_(Tensor output, Tensor input, int axis = -1);
} // namespace infinicore::op
2 changes: 1 addition & 1 deletion include/infinicore/quantization/compressed_tensors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class CompressedTensors : public BaseQuantization {
// information and support multiple quantization schemes.
public:
explicit CompressedTensors(const nlohmann::json &quant_config)
: BaseQuantization(quant_config) {};
: BaseQuantization(quant_config){};

infinicore::quantization::QuantScheme
get_quant_scheme() const override {
Expand Down
2 changes: 1 addition & 1 deletion include/infinicore/quantization/none_quantizaiton.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class NoneQuantization : public BaseQuantization {
// information and support multiple quantization schemes.
public:
explicit NoneQuantization(const nlohmann::json &quant_config)
: BaseQuantization(quant_config) {};
: BaseQuantization(quant_config){};

infinicore::quantization::QuantScheme
get_quant_scheme() const override {
Expand Down
2 changes: 2 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "infiniop/ops/fmin.h"
#include "infiniop/ops/fmod.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gelutanh.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/hardtanh.h"
Expand Down Expand Up @@ -66,6 +67,7 @@
#include "infiniop/ops/paged_caching.h"
#include "infiniop/ops/quant/per_channel_quant_int8.h"
#include "infiniop/ops/quant/per_tensor_quant_int8.h"
#include "infiniop/ops/quickgelu.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
#include "infiniop/ops/reciprocal.h"
Expand Down
20 changes: 10 additions & 10 deletions include/infiniop/ops/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@
typedef struct InfiniopDescriptor *infiniopAddDescriptor_t;

__INFINI_C __export infiniStatus_t infiniopCreateAddDescriptor(infiniopHandle_t handle,
infiniopAddDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);
infiniopAddDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);

__INFINI_C __export infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, size_t *size);

__INFINI_C __export infiniStatus_t infiniopAdd(infiniopAddDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);

__INFINI_C __export infiniStatus_t infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc);

Expand Down
Loading
Loading