diff --git a/MODEL-MATRIX.md b/MODEL-MATRIX.md index af80b5b..4282685 100644 --- a/MODEL-MATRIX.md +++ b/MODEL-MATRIX.md @@ -54,7 +54,6 @@ | Dependency | Version | Reason | |-----------|---------|--------| | transformers | `>=5.0.0` | VL models require v5; standard models work with v5 too | -| transformers (git fallback) | `3c2517727ce28a30f5044e01663ee204deb1cdbe` | For VL if v5 has issues | | vLLM | `==0.14` | Latest stable with LFM support | | vLLM (VL custom wheel) | commit `72506c98349d6bcd32b4e33eec7b5513453c1502` | VL support not yet upstream | | llama.cpp | Latest via `brew install` or b7075+ binaries | | diff --git a/deployment/gpu-inference/transformers.mdx b/deployment/gpu-inference/transformers.mdx index f5cf185..6af17ec 100644 --- a/deployment/gpu-inference/transformers.mdx +++ b/deployment/gpu-inference/transformers.mdx @@ -27,11 +27,6 @@ Install the required dependencies: uv pip install "transformers>=5.0.0" torch accelerate ``` -> **Note:** Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source: -> ```bash -> uv pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6 torch accelerate -> ``` - GPU is recommended for faster inference. ## Basic Usage diff --git a/deployment/gpu-inference/vllm.mdx b/deployment/gpu-inference/vllm.mdx index 115cfa7..f18b8f1 100644 --- a/deployment/gpu-inference/vllm.mdx +++ b/deployment/gpu-inference/vllm.mdx @@ -196,13 +196,6 @@ VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 VLLM_USE_ uv pip install "transformers>=5.0.0" pillow ``` - -Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source: -```bash -uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow -``` - - This installs vLLM with the necessary changes for LFM Vision Model support. Once these changes are merged upstream, you'll be able to use the standard vLLM installation. ### Basic Usage diff --git a/lfm/models/lfm2-24b-a2b.mdx b/lfm/models/lfm2-24b-a2b.mdx index 9de69c6..93d8364 100644 --- a/lfm/models/lfm2-24b-a2b.mdx +++ b/lfm/models/lfm2-24b-a2b.mdx @@ -4,7 +4,6 @@ description: "24B parameter Mixture-of-Experts model with 2B active parameters --- import { TextTransformers } from "/snippets/quickstart/text-transformers.mdx"; -import { TextVllm } from "/snippets/quickstart/text-vllm.mdx"; import { TextLlamacpp } from "/snippets/quickstart/text-llamacpp.mdx"; ← Back to Text Models @@ -54,6 +53,28 @@ LFM2-24B-A2B is Liquid AI's largest Mixture-of-Experts model, combining 24B tota - + + LFM2-24B-A2B requires vLLM ≥0.15.1 and transformers ≥5.1.0. Install transformers on top of the vLLM image. + + + **Install:** + + ```bash + uv pip install "vllm>=0.15.1" + uv pip install "transformers>=5.1.0" + ``` + + **Run:** + + ```python + from vllm import LLM, SamplingParams + + llm = LLM(model="LiquidAI/LFM2-24B-A2B") + + sampling_params = SamplingParams(temperature=0.1, top_k=50, repetition_penalty=1.05, max_tokens=512) + + output = llm.chat("What is machine learning?", sampling_params) + print(output[0].outputs[0].text) + ``` diff --git a/notebooks/LFM2_Inference_with_Transformers.ipynb b/notebooks/LFM2_Inference_with_Transformers.ipynb index 636ba48..7bc862b 100644 --- a/notebooks/LFM2_Inference_with_Transformers.ipynb +++ b/notebooks/LFM2_Inference_with_Transformers.ipynb @@ -25,9 +25,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "!uv pip install \"transformers>=5.0.0\" \"torch==2.9.0\" accelerate" - ] + "source": "!uv pip uninstall torchvision -y\n!uv pip install \"transformers>=5.0.0\" torchvision accelerate" }, { "cell_type": "markdown", @@ -43,34 +41,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from transformers import AutoModelForCausalLM, AutoTokenizer\n", - "\n", - "# Load model and tokenizer\n", - "model_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n", - "model = AutoModelForCausalLM.from_pretrained(\n", - " model_id,\n", - " device_map=\"auto\",\n", - " dtype=\"bfloat16\",\n", - ")\n", - "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", - "\n", - "# Generate answer\n", - "prompt = \"What is C. elegans?\"\n", - "inputs = tokenizer.apply_chat_template(\n", - " [{\"role\": \"user\", \"content\": prompt}],\n", - " add_generation_prompt=True,\n", - " return_tensors=\"pt\",\n", - " return_dict=True,\n", - ").to(model.device)\n", - "\n", - "output = model.generate(**inputs, max_new_tokens=512)\n", - "\n", - "# Decode only the newly generated tokens\n", - "input_length = inputs[\"input_ids\"].shape[1]\n", - "response = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\n", - "print(response)" - ] + "source": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load model and tokenizer\nmodel_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\n# Generate answer\nprompt = \"What is C. elegans?\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\n\n# Decode only the newly generated tokens\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)" }, { "cell_type": "markdown", @@ -86,7 +57,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "from transformers import GenerationConfig\n\ngeneration_config = GenerationConfig(\n do_sample=True,\n temperature=0.1,\n top_k=50,\n repetition_penalty=1.05,\n max_new_tokens=512,\n)\n\nprompt = \"Explain quantum computing in simple terms.\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, generation_config=generation_config)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)" + "source": "from transformers import GenerationConfig\n\ngeneration_config = GenerationConfig(\n do_sample=True,\n temperature=0.1,\n top_k=50,\n repetition_penalty=1.05,\n max_new_tokens=512,\n)\n\nprompt = \"Explain quantum computing in simple terms.\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, generation_config=generation_config)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)" }, { "cell_type": "markdown", @@ -102,20 +73,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from transformers import TextStreamer\n", - "\n", - "prompt = \"Tell me a story about space exploration.\"\n", - "inputs = tokenizer.apply_chat_template(\n", - " [{\"role\": \"user\", \"content\": prompt}],\n", - " add_generation_prompt=True,\n", - " return_tensors=\"pt\",\n", - " return_dict=True,\n", - ").to(model.device)\n", - "\n", - "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n", - "output = model.generate(**inputs, streamer=streamer, max_new_tokens=512)" - ] + "source": "from transformers import TextStreamer\n\nprompt = \"Tell me a story about space exploration.\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\nstreamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\noutput = model.generate(**inputs, streamer=streamer, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)" }, { "cell_type": "markdown", diff --git a/notebooks/quickstart_snippets.ipynb b/notebooks/quickstart_snippets.ipynb index 85e4255..3f58818 100644 --- a/notebooks/quickstart_snippets.ipynb +++ b/notebooks/quickstart_snippets.ipynb @@ -32,7 +32,7 @@ "snippet": "text-transformers" }, "outputs": [], - "source": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": \"What is machine learning?\"}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)" + "source": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": \"What is machine learning?\"}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)" }, { "cell_type": "code", diff --git "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" index 08abb1c..4c9c022 100644 --- "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" +++ "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" @@ -44,7 +44,7 @@ "outputId": "01173385-066c-4114-d217-6d6e1d91f12b" }, "outputs": [], - "source": "!uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe datasets trl" + "source": "!uv pip install \"transformers>=5.0.0\" datasets trl" }, { "cell_type": "markdown", diff --git a/scripts/generate_snippets.py b/scripts/generate_snippets.py index b131a7f..e7b6a7e 100644 --- a/scripts/generate_snippets.py +++ b/scripts/generate_snippets.py @@ -45,7 +45,7 @@ "sections": [ {"type": "label", "text": "Install:"}, {"type": "code_block", "language": "bash", - "code": 'pip install "transformers>=5.0.0" torch accelerate'}, + "code": 'uv pip install "transformers>=5.0.0" torch accelerate'}, {"type": "label", "text": "Download & Run:"}, {"type": "code_block", "language": "python", "code": ( @@ -82,7 +82,7 @@ "sections": [ {"type": "label", "text": "Install:"}, {"type": "code_block", "language": "bash", - "code": "pip install vllm==0.14"}, + "code": "uv pip install vllm==0.14"}, {"type": "label", "text": "Run:"}, {"type": "code_block", "language": "python", "code": ( @@ -121,13 +121,7 @@ "sections": [ {"type": "label", "text": "Install:"}, {"type": "code_block", "language": "bash", - "code": 'pip install "transformers>=5.0.0" pillow torch'}, - {"type": "note", "children": [ - {"type": "text", - "text": "Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:"}, - {"type": "code_block_margin", "language": "bash", - "code": "pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow torch"}, - ]}, + "code": 'uv pip install "transformers>=5.0.0" pillow torch'}, {"type": "label", "text": "Download & Run:"}, {"type": "notebook_code", "language": "python"}, ], @@ -142,15 +136,9 @@ "text": "vLLM support for LFM Vision Models requires a specific version. Install from the custom source below."}, {"type": "label", "text": "Install:"}, {"type": "code_block", "language": "bash", - "code": "VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 \\\n VLLM_USE_PRECOMPILED=1 \\\n pip install git+https://github.com/vllm-project/vllm.git"}, + "code": "VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 \\\n VLLM_USE_PRECOMPILED=1 \\\n uv pip install git+https://github.com/vllm-project/vllm.git"}, {"type": "code_block", "language": "bash", - "code": 'pip install "transformers>=5.0.0" pillow'}, - {"type": "note", "children": [ - {"type": "text", - "text": "Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:"}, - {"type": "code_block_margin", "language": "bash", - "code": "pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow"}, - ]}, + "code": 'uv pip install "transformers>=5.0.0" pillow'}, {"type": "label", "text": "Run:"}, {"type": "notebook_code", "language": "python"}, ], diff --git a/snippets/quickstart/text-transformers.mdx b/snippets/quickstart/text-transformers.mdx index f6c3cbe..1c63446 100644 --- a/snippets/quickstart/text-transformers.mdx +++ b/snippets/quickstart/text-transformers.mdx @@ -3,7 +3,7 @@ export const TextTransformers = ({ modelId, samplingParams }) => (

Install:

 
-{`pip install "transformers>=5.0.0" torch accelerate`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install "transformers>=5.0.0" torch accelerate`.split('\n').map((line, i) => {line}{'\n'})}
 
 

Download & Run:

diff --git a/snippets/quickstart/text-vllm.mdx b/snippets/quickstart/text-vllm.mdx index 5976a58..55b8671 100644 --- a/snippets/quickstart/text-vllm.mdx +++ b/snippets/quickstart/text-vllm.mdx @@ -3,7 +3,7 @@ export const TextVllm = ({ modelId, samplingParams }) => (

Install:

 
-{`pip install vllm==0.14`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install vllm==0.14`.split('\n').map((line, i) => {line}{'\n'})}
 
 

Run:

diff --git a/snippets/quickstart/vl-transformers.mdx b/snippets/quickstart/vl-transformers.mdx index 782c4f0..0c0acba 100644 --- a/snippets/quickstart/vl-transformers.mdx +++ b/snippets/quickstart/vl-transformers.mdx @@ -3,17 +3,9 @@ export const VlTransformers = ({ modelId }) => (

Install:

 
-{`pip install "transformers>=5.0.0" pillow torch`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install "transformers>=5.0.0" pillow torch`.split('\n').map((line, i) => {line}{'\n'})}
 
 
- -Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source: -
-
-{`pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow torch`.split('\n').map((line, i) => {line}{'\n'})}
-
-
-

Download & Run:

 
diff --git a/snippets/quickstart/vl-vllm.mdx b/snippets/quickstart/vl-vllm.mdx
index e1f8bf0..70a9a5f 100644
--- a/snippets/quickstart/vl-vllm.mdx
+++ b/snippets/quickstart/vl-vllm.mdx
@@ -8,22 +8,14 @@ vLLM support for LFM Vision Models requires a specific version. Install from the
 
 {`VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 \\
   VLLM_USE_PRECOMPILED=1 \\
-  pip install git+https://github.com/vllm-project/vllm.git`.split('\n').map((line, i) => {line}{'\n'})}
+  uv pip install git+https://github.com/vllm-project/vllm.git`.split('\n').map((line, i) => {line}{'\n'})}
 
 
 
-{`pip install "transformers>=5.0.0" pillow`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install "transformers>=5.0.0" pillow`.split('\n').map((line, i) => {line}{'\n'})}
 
 
- -Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source: -
-
-{`pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow`.split('\n').map((line, i) => {line}{'\n'})}
-
-
-

Run: