diff --git a/kleidiai-examples/audiogen/install_requirements.sh b/kleidiai-examples/audiogen/install_requirements.sh index b7986d8..de39725 100644 --- a/kleidiai-examples/audiogen/install_requirements.sh +++ b/kleidiai-examples/audiogen/install_requirements.sh @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2025-2026 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -9,31 +9,31 @@ # Install individual packages echo "Installing required packages for the Audiogen module..." -# ai-edge-torch -pip install ai-edge-torch==0.4.0 \ - "tf-nightly>=2.19.0.dev20250208" \ - "ai-edge-litert-nightly>=1.1.2.dev20250305" \ - "ai-edge-quantizer-nightly>=0.0.1.dev20250208" +# LiteRT torch +pip install litert-torch==0.8.0 \ + "ai-edge-litert==2.1.2" \ + "ai-edge-quantizer==0.4.2" # Stable audio tools pip install "stable_audio_tools==0.0.19" # Working out dependency issues, this combination of packages has been tested on different systems (Linux and MacOS). -pip install --no-deps "torch==2.6.0" \ - "torchaudio==2.6.0" \ - "torchvision==0.21.0" \ - "protobuf==5.29.4" \ +pip install --no-deps "torch==2.9.0" \ + "torchaudio==2.9.0" \ + "torchvision==0.24.0" \ + "protobuf==5.29.6" \ "numpy==1.26.4" \ -# Packages to convert via onnx +# Packages to convert via onnx pip install --no-deps "onnx==1.18.0" \ "onnxsim==0.4.36" \ + "onnx-ir==0.1.16" \ "onnx2tf==1.27.10" \ + "onnxscript==0.6.2" \ "tensorflow==2.19.0" \ "tf_keras==2.19.0" \ "onnx-graphsurgeon==0.5.8" \ - "ai_edge_litert" \ "sng4onnx==1.0.4" echo "Finished installing required packages for AudioGen submodules conversion." diff --git a/kleidiai-examples/audiogen/scripts/export_dit_autoencoder.py b/kleidiai-examples/audiogen/scripts/export_dit_autoencoder.py index 639363d..4be0e8e 100644 --- a/kleidiai-examples/audiogen/scripts/export_dit_autoencoder.py +++ b/kleidiai-examples/audiogen/scripts/export_dit_autoencoder.py @@ -1,26 +1,28 @@ # -# SPDX-FileCopyrightText: Copyright 2025 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2025-2026 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # +# Disable GPU to avoid any issues during export +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "" + import argparse import json import logging -import os -import ai_edge_torch +import litert_torch import torch from einops import rearrange -from ai_edge_torch.generative.quantize import quant_recipe, quant_recipe_utils -from ai_edge_torch.quantize import quant_config +from litert_torch.generative.quantize import quant_recipe, quant_recipe_utils +from litert_torch.quantize import quant_config from utils_load_model import load_model import stable_audio_tools -os.environ["CUDA_VISIBLE_DEVICES"] = "" torch.manual_seed(0) DEVICE = torch.device("cpu") @@ -157,7 +159,7 @@ def export_audiogen(args) -> None: # Create the dynamic weights int8 quantization config quant_config_audiogen_int8 = quant_config.QuantConfig( generative_recipe=quant_recipe.GenerativeQuantRecipe( - default=quant_recipe_utils.create_layer_quant_int8_dynamic(), + default=quant_recipe_utils.create_layer_quant_dynamic(), ) ) @@ -178,7 +180,7 @@ def rotary_emb_const(_): dit_model.model.transformer.rotary_pos_emb.forward_from_seq_len = rotary_emb_const # Export the DiT to LiteRT format - edge_model = ai_edge_torch.convert( + edge_model = litert_torch.convert( dit_model, sample_args=None, sample_kwargs=dit_model_example_input, quant_config=quant_config_audiogen_int8 ) edge_model.export("./dit_model.tflite") @@ -192,7 +194,7 @@ def rotary_emb_const(_): autoencoder_decoder_example_input = get_autoencoder_decoder_example_input(dtype) # Export the Encoder part of the AutoEncoder to LiteRT format - edge_model = ai_edge_torch.convert( + edge_model = litert_torch.convert( autoencoder_decoder, autoencoder_decoder_example_input, ) @@ -209,7 +211,7 @@ def rotary_emb_const(_): autoencoder_encoder_example_input = get_autoencoder_encoder_example_input(dtype) # Export the AutoEncoder to LiteRT format - edge_model = ai_edge_torch.convert( + edge_model = litert_torch.convert( autoencoder_encoder, autoencoder_encoder_example_input, )