Conversation
| // - HyPE (RoPE on linear layers; NoPE on sparse layers) | ||
| class MiniCPMSALAForCausalLM : public InfinilmModel { | ||
| public: | ||
| MiniCPMSALAForCausalLM(std::shared_ptr<infinilm::config::ModelConfig> model_config, |
There was a problem hiding this comment.
https://github.com/pengcheng888/InfiniLM/blob/main/csrc/models/minicpm_sala/minicpm_sala_for_causal_lm.hpp 请参开接口。 移除rank_info和 attention_backend 参数。
| private: | ||
| INFINICORE_NN_MODULE(MiniCPMSALAModel, model); | ||
| INFINICORE_NN_MODULE(infinilm::layers::linear::ReplicatedLinear, lm_head); | ||
| INFINICORE_NN_MODULE(infinicore::nn::Linear, lm_head); |
There was a problem hiding this comment.
使用infinilm::layers::linear::ReplicatedLinear, infinicore::nn::Linear不再使用
| std::unique_ptr<cache::CacheConfig> cache_config_; | ||
| }; | ||
|
|
||
| std::shared_ptr<infinilm::config::ModelConfig> create_minicpm_sala_model_config(std::shared_ptr<infinilm::config::ModelConfig> model_config); |
There was a problem hiding this comment.
实现这个create_minicpm_sala_model_config函数。
| const cache::CacheConfig *MiniCPMSALAForCausalLM::get_cache_config() const { | ||
| return cache_config_.get(); | ||
| } | ||
|
|
There was a problem hiding this comment.
kvcache创建 minicpm_sala_allocate_kv_cache_tensors.cpp文件中
|
|
||
| } // namespace infinilm::models::minicpm_sala | ||
|
|
||
| namespace { |
|
|
||
| class MiniCPMSALADecoderLayer : public infinicore::nn::Module { | ||
| public: | ||
| MiniCPMSALADecoderLayer(std::shared_ptr<infinilm::config::ModelConfig> model_config, |
There was a problem hiding this comment.
There was a problem hiding this comment.
MiniCPMSALADecoderLayer的移除rank_info和attention_backend参数
| std::optional<infinicore::Tensor> cu_seqlens, | ||
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
There was a problem hiding this comment.
移除多余的参数,forward只需要(const infinicore::Tensor &positions,
infinicore::Tensor &hidden_states,
infinicore::Tensor &residual);
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
||
| void set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb); |
There was a problem hiding this comment.
移除set_rotary_emb和reset_cache函数
| #include "../../backends/attention_backends.hpp" | ||
| #include "../../cache/kv_cache.hpp" | ||
| #include "../../config/model_config.hpp" | ||
| #include "../../engine/distributed/distributed.hpp" |
| #include "models_registry.hpp" | ||
| #include "llama/llama.hpp" | ||
| #include "minicpm_sala/minicpm_sala_for_causal_lm.hpp" | ||
|
|
|
|
||
| #include "../global_state/global_state.hpp" | ||
| #include "../models/model_factory.hpp" | ||
| #include "../models/models_registry.hpp" |
There was a problem hiding this comment.
新增模型,不要修改框架层面上的代码。不能修改该文件
| const std::string model_type = model_config->get<std::string>("model_type"); | ||
| const auto &config_map = models::get_model_config_map(); | ||
| auto it = config_map.find(model_type); | ||
| if (it != config_map.end()) { |
There was a problem hiding this comment.
新增模型,不要修改框架层面上的代码。不能修改该文件
|
|
||
| #include <algorithm> | ||
| #include <limits> | ||
| #include <memory> |
|
|
||
| void MiniCPMSALAModel::reset_cache(const cache::CacheConfig *cache_config) { | ||
| if (cache_config == nullptr) { | ||
| kv_cache_minicpm4_ = nullptr; |
There was a problem hiding this comment.
kvcache创建的代码在csrc/models/minicpm_sala/minicpm_sala_allocate_kv_cache_tensors.cpp中
| if (auto static_cfg = dynamic_cast<const cache::StaticKVCacheConfig *>(cache_config)) { | ||
| // Allocate separate caches by KV shape to avoid per-layer padding copies. |
| INFINICORE_NN_MODULE_INIT(o_gate, hidden_size_, num_attention_heads * head_dim_, | ||
| model_config->get_quantization_method(), use_bias_, dtype, device); | ||
| } | ||
| void MiniCPMSALAAttention::set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb) { |
| std::optional<infinicore::Tensor> cu_seqlens, | ||
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
| INFINICORE_NN_MODULE_INIT(mlp, model_config, device); | ||
| } | ||
|
|
||
| void MiniCPMSALADecoderLayer::set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb) { |
| void MiniCPMSALADecoderLayer::reset_cache() { | ||
| self_attn_->reset_cache(); |
|
|
||
| auto to_device = [&](const std::optional<infinicore::Tensor> &t) | ||
| -> std::optional<infinicore::Tensor> { | ||
| return t.has_value() ? t.value()->to(device) : t; |
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
| void reset_cache(const cache::CacheConfig *cache_config) override; | ||
|
|
||
| protected: | ||
| const cache::CacheConfig *get_cache_config() const override; |
There was a problem hiding this comment.
get_cache_config()属于 infinimodel的抽象类了,移除具体模型中的get_cache_config函数
| INFINICORE_NN_MODULE(MiniCPMSALAModel, model); | ||
| INFINICORE_NN_MODULE(infinilm::layers::linear::ReplicatedLinear, lm_head); | ||
| INFINICORE_NN_MODULE(infinicore::nn::Linear, lm_head); | ||
| std::unique_ptr<cache::CacheConfig> cache_config_; |
| MiniCPMSALAModel(std::shared_ptr<infinilm::config::ModelConfig> model_config, | ||
| const infinicore::Device &device, | ||
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), | ||
| backends::AttentionBackend attention_backend = backends::AttentionBackend::Default); |
There was a problem hiding this comment.
移除MiniCPMSALAModel的rank_info和attention_backend参数
There was a problem hiding this comment.
attention_backend_ = infinilm::global_state::get_infinilm_config().attention_backend;
There was a problem hiding this comment.
const engine::distributed::RankInfo &rank_info = infinilm::global_state::get_tensor_model_parallel_rank_info();
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), | ||
| backends::AttentionBackend attention_backend = backends::AttentionBackend::Default); | ||
|
|
||
| infinicore::Tensor forward(const infinicore::Tensor &input_ids, |
There was a problem hiding this comment.
移除past_sequence_lengths total_sequence_lengths input_offsets cu_seqlens block_tables slot_mapping 这写参数。 上面是attn_metadata的数据,只要attn计算时用到,不再一层一层的传递。
There was a problem hiding this comment.
移除forward的attn_meta参数
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
||
| void reset_cache(const cache::CacheConfig *cache_config); |
There was a problem hiding this comment.
reset_cache 属于 CausalLM类,移除。
| INFINICORE_NN_MODULE(infinicore::nn::Embedding, embed_tokens); | ||
| INFINICORE_NN_MODULE_VEC(MiniCPMSALADecoderLayer, layers); | ||
| INFINICORE_NN_MODULE(infinicore::nn::RMSNorm, norm); | ||
| INFINICORE_NN_MODULE(infinicore::nn::RoPE, rotary_emb); |
There was a problem hiding this comment.
移除rotary_emb。 infinicore::nn::RoPE的对象在 minicpm_sala_attention类中,通过get_rope创建
| infinicore::Tensor forward(const infinicore::Tensor &hidden_states, | ||
| const infinicore::Tensor &position_ids, | ||
| std::shared_ptr<infinilm::cache::Cache> kv_cache, | ||
| std::optional<infinicore::Tensor> past_sequence_lengths, |
There was a problem hiding this comment.
移除forward的这些 attn_metadata参数
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
||
| void set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb); | ||
| void reset_cache(); |
|
|
||
| kv_cache_minicpm4_ = (minicpm4_layer_count > 0) |
There was a problem hiding this comment.
根据minicpm_sala_allocate_kv_cache_tensors.cpp文件创建kvcache。 kv_cache_minicpm4_和kv_cache_lightning_两个变量可以合并成一个
| MiniCPMSALAModel(std::shared_ptr<infinilm::config::ModelConfig> model_config, | ||
| const infinicore::Device &device, | ||
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), | ||
| backends::AttentionBackend attention_backend = backends::AttentionBackend::Default); |
There was a problem hiding this comment.
attention_backend_ = infinilm::global_state::get_infinilm_config().attention_backend;
| MiniCPMSALAModel(std::shared_ptr<infinilm::config::ModelConfig> model_config, | ||
| const infinicore::Device &device, | ||
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), | ||
| backends::AttentionBackend attention_backend = backends::AttentionBackend::Default); |
There was a problem hiding this comment.
const engine::distributed::RankInfo &rank_info = infinilm::global_state::get_tensor_model_parallel_rank_info();
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), | ||
| backends::AttentionBackend attention_backend = backends::AttentionBackend::Default); | ||
|
|
||
| infinicore::Tensor forward(const infinicore::Tensor &input_ids, |
There was a problem hiding this comment.
移除forward的attn_meta参数
| infinicore::Tensor forward(const infinicore::Tensor &position_ids, | ||
| const infinicore::Tensor &hidden_states) const; | ||
|
|
||
| void set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb); |
There was a problem hiding this comment.
通过get_rope()创建 RoPE模块的对象
python/infinilm/modeling_utils.py
Outdated
| for k in f.keys(): | ||
| state_dict[k] = f.get_tensor(k).to(device=device) | ||
| # Explicitly cast dtype: some ops (e.g. embedding) may not support BF16 on all backends. | ||
| state_dict[k] = f.get_tensor(k).to(device=device, dtype=dtype) |
| scale_down = 1.0 | ||
| scale_lm_head = 1.0 | ||
| try: | ||
| with open(os.path.join(model_path, "config.json")) as f: |
There was a problem hiding this comment.
TODO: 后续config_json会从 model变量中读,而不是读取文件
| scale_down = 1.0 | ||
| scale_lm_head = 1.0 | ||
| try: | ||
| with open(os.path.join(model_path, "config.json")) as f: |
There was a problem hiding this comment.
TODO: 后续config_json会从 model变量中读,而不是读取文件
|
|
||
| # Apply MiniCPM scaling to loaded tensors (in torch space). | ||
| if scale_input != 1.0 and "model.embed_tokens.weight" in model_param: | ||
| model_param["model.embed_tokens.weight"] = ( |
| MiniCPMSALAForCausalLM(std::shared_ptr<infinilm::config::ModelConfig> model_config, | ||
| const infinicore::Device &device); | ||
| const infinicore::Device &device, | ||
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), |
| MiniCPMSALAForCausalLM::MiniCPMSALAForCausalLM( | ||
| std::shared_ptr<infinilm::config::ModelConfig> model_config, | ||
| const infinicore::Device &device, | ||
| engine::distributed::RankInfo rank_info, |
There was a problem hiding this comment.
engine::distributed::RankInfo rank_info,
backends::AttentionBackend attention_backend) 移除这两个参数
| const Input &input) const { | ||
| auto input_ids = input.input_ids.value(); | ||
| auto position_ids = input.position_ids.value(); | ||
|
|
| auto block_tables = input.block_tables; | ||
| auto slot_mapping = input.slot_mapping; | ||
|
|
||
| infinilm::global_state::get_forward_context().attn_metadata = |
There was a problem hiding this comment.
删除 infinilm::global_state::get_forward_context().attn_metadata 的赋值. 全局变量的 attn_metadata只能由框架赋值
|
|
||
| private: | ||
| std::shared_ptr<infinilm::config::ModelConfig> model_config_; | ||
| std::shared_ptr<infinicore::nn::RoPE> rotary_emb_; |
| INFINICORE_NN_MODULE_INIT(embed_tokens, vocab_size, hidden_size_, std::nullopt, dtype, device); | ||
| INFINICORE_NN_MODULE_INIT(norm, hidden_size_, model_config_->get<double>("rms_norm_eps"), dtype, device); | ||
|
|
||
| // Shared rotary embedding (used by lightning layers only) — match `get_rope` pattern. |
There was a problem hiding this comment.
MOdel类中的rotary_emb_变量没有被用到, 删除
| compute_device_ = device; | ||
| const engine::distributed::RankInfo &rank_info = infinilm::global_state::get_tensor_model_parallel_rank_info(); | ||
| const backends::AttentionBackend attention_backend = infinilm::global_state::get_infinilm_config().attention_backend; | ||
|
|
| const infinicore::Tensor &position_ids) const; | ||
|
|
||
| private: | ||
| friend class MiniCPMSALAModel; |
|
请将范围限定在minicpm_sala文件夹中 , 先让ai帮你移除多余的未使用到的头文件, 未使用到的变量. 然后根据最新的评论修改 |
Signed-off-by: Ceng23333 <441651826@qq.com>
|
infinicore那边的是不是也得改改 |
是, 需要先合并infinicore的pr |
#294