diff --git a/bindings/chatllm.py b/bindings/chatllm.py index 506d210..6aa1c34 100644 --- a/bindings/chatllm.py +++ b/bindings/chatllm.py @@ -562,6 +562,17 @@ def save_session(self, file_name: str) -> str: def load_session(self, file_name: str) -> str: return self._lib.load_session(self._chat, file_name) + def destroy(self) -> int: + if hasattr(self, "_chat") and self._chat: + if self.is_generating: self.abort() + obj_id = LibChatLLM._obj2id.get(self) + if obj_id is not None: + LibChatLLM._obj2id.pop(self, None) + LibChatLLM._id2obj.pop(obj_id, None) + self._lib.destroy(self._chat) + self._chat = None + return 0 + def callback_print_reference(self, s: str) -> None: self.references.append(s) diff --git a/src/chat.cpp b/src/chat.cpp index fd562b5..79d6cd6 100644 --- a/src/chat.cpp +++ b/src/chat.cpp @@ -627,6 +627,15 @@ namespace chatllm qa_encoder->set_tokenizer(this); } + BaseTokenizer::~BaseTokenizer() + { + if (tp) + { + delete tp; + tp = nullptr; + } + } + void BaseTokenizer::set_chat_encoder(BaseHistoryEncoder *encoder) { chat_encoder = encoder; diff --git a/src/chat.h b/src/chat.h index c293b29..0936e8b 100644 --- a/src/chat.h +++ b/src/chat.h @@ -287,7 +287,7 @@ namespace chatllm BaseHistoryEncoder *qa_encoder = nullptr, BaseHistoryEncoder *completion_encoder = nullptr); - virtual ~BaseTokenizer() = default; + virtual ~BaseTokenizer(); virtual size_t load(tokenizer::DataReader *buffer, int n_vocab) = 0; diff --git a/src/layers.h b/src/layers.h index dedc91c..275696a 100644 --- a/src/layers.h +++ b/src/layers.h @@ -311,6 +311,14 @@ namespace chatllm PreludeCacheDisable(void): disabler(new BlockParams::DisableCache()) { } + virtual ~PreludeCacheDisable() + { + if (disabler) + { + delete disabler; + disabler = nullptr; + } + } protected: BlockParams::DisableCache *disabler; }; @@ -1421,7 +1429,7 @@ namespace chatllm sinks(BlockParams::CoreAttentionUseSinks::get() > 0 ? ggml::new_tensor_1d(ctx, ggml::type::GGML_TYPE_F32, BlockParams::CoreAttentionUseSinks::get()) : nullptr), - pos_helper(helper ? helper : &def_pos_helper) + pos_helper(helper ? helper : new BaseTensorPosHelper(max_length)) { allocate_pos_tensor(ctx); } diff --git a/src/models.cpp b/src/models.cpp index 7724fa5..a440c1a 100644 --- a/src/models.cpp +++ b/src/models.cpp @@ -925,6 +925,15 @@ namespace chatllm layer_ids.push_back(i); } + BaseModelForConditionalGeneration::~BaseModelForConditionalGeneration() + { + if (transformer) + { + delete transformer; + transformer = nullptr; + } + } + void BaseModelForConditionalGeneration::set_layer_ids(const std::vector &ids) { CHATLLM_CHECK((int)ids.size() == config_.num_hidden_layers) << "length(layer_ids) must be " << config_.num_hidden_layers; diff --git a/src/models_priv.h b/src/models_priv.h index 7177a07..419382b 100644 --- a/src/models_priv.h +++ b/src/models_priv.h @@ -395,7 +395,7 @@ namespace chatllm { public: BaseModelForConditionalGeneration(ModelType model_type, BaseConfig config, const RuntimeConfig &runtime_config, size_t GRAPH_SIZE = 4096); - virtual ~BaseModelForConditionalGeneration() = default; + virtual ~BaseModelForConditionalGeneration(); void set_layer_ids(const std::vector &ids) override; int get_max_length(void) override; diff --git a/src/tokenizer.h b/src/tokenizer.h index f78eb0c..a6d9b51 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -106,6 +106,8 @@ class TextPrepAddLeadingSpace : public TextPreprocessor class DataReader { public: + virtual ~DataReader() {} + virtual int64_t tell() = 0; virtual void seek(int64_t offset, int whence) = 0; virtual int64_t size(void) const { return _size; } @@ -136,6 +138,8 @@ class Processor vocab_.byte_fallback_ready = false; } + virtual ~Processor() {} + virtual size_t Load(DataReader *data_reader, int n_vocab) = 0; virtual int PieceToId(std::string_view piece) const;