diff --git a/MODEL-MATRIX.md b/MODEL-MATRIX.md
index af80b5b..4282685 100644
--- a/MODEL-MATRIX.md
+++ b/MODEL-MATRIX.md
@@ -54,7 +54,6 @@
| Dependency | Version | Reason |
|-----------|---------|--------|
| transformers | `>=5.0.0` | VL models require v5; standard models work with v5 too |
-| transformers (git fallback) | `3c2517727ce28a30f5044e01663ee204deb1cdbe` | For VL if v5 has issues |
| vLLM | `==0.14` | Latest stable with LFM support |
| vLLM (VL custom wheel) | commit `72506c98349d6bcd32b4e33eec7b5513453c1502` | VL support not yet upstream |
| llama.cpp | Latest via `brew install` or b7075+ binaries | |
diff --git a/deployment/gpu-inference/transformers.mdx b/deployment/gpu-inference/transformers.mdx
index f5cf185..6af17ec 100644
--- a/deployment/gpu-inference/transformers.mdx
+++ b/deployment/gpu-inference/transformers.mdx
@@ -27,11 +27,6 @@ Install the required dependencies:
uv pip install "transformers>=5.0.0" torch accelerate
```
-> **Note:** Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:
-> ```bash
-> uv pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6 torch accelerate
-> ```
-
GPU is recommended for faster inference.
## Basic Usage
diff --git a/deployment/gpu-inference/vllm.mdx b/deployment/gpu-inference/vllm.mdx
index 115cfa7..f18b8f1 100644
--- a/deployment/gpu-inference/vllm.mdx
+++ b/deployment/gpu-inference/vllm.mdx
@@ -196,13 +196,6 @@ VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 VLLM_USE_
uv pip install "transformers>=5.0.0" pillow
```
-
-Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:
-```bash
-uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow
-```
-
-
This installs vLLM with the necessary changes for LFM Vision Model support. Once these changes are merged upstream, you'll be able to use the standard vLLM installation.
### Basic Usage
diff --git a/lfm/models/lfm2-24b-a2b.mdx b/lfm/models/lfm2-24b-a2b.mdx
index 9de69c6..93d8364 100644
--- a/lfm/models/lfm2-24b-a2b.mdx
+++ b/lfm/models/lfm2-24b-a2b.mdx
@@ -4,7 +4,6 @@ description: "24B parameter Mixture-of-Experts model with 2B active parameters
---
import { TextTransformers } from "/snippets/quickstart/text-transformers.mdx";
-import { TextVllm } from "/snippets/quickstart/text-vllm.mdx";
import { TextLlamacpp } from "/snippets/quickstart/text-llamacpp.mdx";
← Back to Text Models
@@ -54,6 +53,28 @@ LFM2-24B-A2B is Liquid AI's largest Mixture-of-Experts model, combining 24B tota
-
+
+ LFM2-24B-A2B requires vLLM ≥0.15.1 and transformers ≥5.1.0. Install transformers on top of the vLLM image.
+
+
+ **Install:**
+
+ ```bash
+ uv pip install "vllm>=0.15.1"
+ uv pip install "transformers>=5.1.0"
+ ```
+
+ **Run:**
+
+ ```python
+ from vllm import LLM, SamplingParams
+
+ llm = LLM(model="LiquidAI/LFM2-24B-A2B")
+
+ sampling_params = SamplingParams(temperature=0.1, top_k=50, repetition_penalty=1.05, max_tokens=512)
+
+ output = llm.chat("What is machine learning?", sampling_params)
+ print(output[0].outputs[0].text)
+ ```
diff --git a/notebooks/LFM2_Inference_with_Transformers.ipynb b/notebooks/LFM2_Inference_with_Transformers.ipynb
index 636ba48..7bc862b 100644
--- a/notebooks/LFM2_Inference_with_Transformers.ipynb
+++ b/notebooks/LFM2_Inference_with_Transformers.ipynb
@@ -25,9 +25,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "!uv pip install \"transformers>=5.0.0\" \"torch==2.9.0\" accelerate"
- ]
+ "source": "!uv pip uninstall torchvision -y\n!uv pip install \"transformers>=5.0.0\" torchvision accelerate"
},
{
"cell_type": "markdown",
@@ -43,34 +41,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
- "\n",
- "# Load model and tokenizer\n",
- "model_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\n",
- "model = AutoModelForCausalLM.from_pretrained(\n",
- " model_id,\n",
- " device_map=\"auto\",\n",
- " dtype=\"bfloat16\",\n",
- ")\n",
- "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
- "\n",
- "# Generate answer\n",
- "prompt = \"What is C. elegans?\"\n",
- "inputs = tokenizer.apply_chat_template(\n",
- " [{\"role\": \"user\", \"content\": prompt}],\n",
- " add_generation_prompt=True,\n",
- " return_tensors=\"pt\",\n",
- " return_dict=True,\n",
- ").to(model.device)\n",
- "\n",
- "output = model.generate(**inputs, max_new_tokens=512)\n",
- "\n",
- "# Decode only the newly generated tokens\n",
- "input_length = inputs[\"input_ids\"].shape[1]\n",
- "response = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\n",
- "print(response)"
- ]
+ "source": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load model and tokenizer\nmodel_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\n# Generate answer\nprompt = \"What is C. elegans?\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\n\n# Decode only the newly generated tokens\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)"
},
{
"cell_type": "markdown",
@@ -86,7 +57,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": "from transformers import GenerationConfig\n\ngeneration_config = GenerationConfig(\n do_sample=True,\n temperature=0.1,\n top_k=50,\n repetition_penalty=1.05,\n max_new_tokens=512,\n)\n\nprompt = \"Explain quantum computing in simple terms.\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, generation_config=generation_config)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)"
+ "source": "from transformers import GenerationConfig\n\ngeneration_config = GenerationConfig(\n do_sample=True,\n temperature=0.1,\n top_k=50,\n repetition_penalty=1.05,\n max_new_tokens=512,\n)\n\nprompt = \"Explain quantum computing in simple terms.\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, generation_config=generation_config)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)"
},
{
"cell_type": "markdown",
@@ -102,20 +73,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "from transformers import TextStreamer\n",
- "\n",
- "prompt = \"Tell me a story about space exploration.\"\n",
- "inputs = tokenizer.apply_chat_template(\n",
- " [{\"role\": \"user\", \"content\": prompt}],\n",
- " add_generation_prompt=True,\n",
- " return_tensors=\"pt\",\n",
- " return_dict=True,\n",
- ").to(model.device)\n",
- "\n",
- "streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
- "output = model.generate(**inputs, streamer=streamer, max_new_tokens=512)"
- ]
+ "source": "from transformers import TextStreamer\n\nprompt = \"Tell me a story about space exploration.\"\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": prompt}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\nstreamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\noutput = model.generate(**inputs, streamer=streamer, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)"
},
{
"cell_type": "markdown",
diff --git a/notebooks/quickstart_snippets.ipynb b/notebooks/quickstart_snippets.ipynb
index 85e4255..3f58818 100644
--- a/notebooks/quickstart_snippets.ipynb
+++ b/notebooks/quickstart_snippets.ipynb
@@ -32,7 +32,7 @@
"snippet": "text-transformers"
},
"outputs": [],
- "source": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": \"What is machine learning?\"}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)"
+ "source": "from transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_id = \"LiquidAI/LFM2.5-1.2B-Instruct\"\nmodel = AutoModelForCausalLM.from_pretrained(\n model_id,\n device_map=\"auto\",\n dtype=\"bfloat16\",\n)\ntokenizer = AutoTokenizer.from_pretrained(model_id)\n\ninputs = tokenizer.apply_chat_template(\n [{\"role\": \"user\", \"content\": \"What is machine learning?\"}],\n add_generation_prompt=True,\n return_tensors=\"pt\",\n tokenize=True,\n return_dict=True,\n).to(model.device)\n\noutput = model.generate(**inputs, do_sample=True, temperature=0.1, top_k=50, repetition_penalty=1.05, max_new_tokens=512)\ninput_length = inputs[\"input_ids\"].shape[1]\nresponse = tokenizer.decode(output[0][input_length:], skip_special_tokens=True)\nprint(response)"
},
{
"cell_type": "code",
diff --git "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb" "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb"
index 08abb1c..4c9c022 100644
--- "a/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb"
+++ "b/notebooks/\360\237\222\247_LFM2_5_VL_SFT_with_TRL.ipynb"
@@ -44,7 +44,7 @@
"outputId": "01173385-066c-4114-d217-6d6e1d91f12b"
},
"outputs": [],
- "source": "!uv pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe datasets trl"
+ "source": "!uv pip install \"transformers>=5.0.0\" datasets trl"
},
{
"cell_type": "markdown",
diff --git a/scripts/generate_snippets.py b/scripts/generate_snippets.py
index b131a7f..e7b6a7e 100644
--- a/scripts/generate_snippets.py
+++ b/scripts/generate_snippets.py
@@ -45,7 +45,7 @@
"sections": [
{"type": "label", "text": "Install:"},
{"type": "code_block", "language": "bash",
- "code": 'pip install "transformers>=5.0.0" torch accelerate'},
+ "code": 'uv pip install "transformers>=5.0.0" torch accelerate'},
{"type": "label", "text": "Download & Run:"},
{"type": "code_block", "language": "python",
"code": (
@@ -82,7 +82,7 @@
"sections": [
{"type": "label", "text": "Install:"},
{"type": "code_block", "language": "bash",
- "code": "pip install vllm==0.14"},
+ "code": "uv pip install vllm==0.14"},
{"type": "label", "text": "Run:"},
{"type": "code_block", "language": "python",
"code": (
@@ -121,13 +121,7 @@
"sections": [
{"type": "label", "text": "Install:"},
{"type": "code_block", "language": "bash",
- "code": 'pip install "transformers>=5.0.0" pillow torch'},
- {"type": "note", "children": [
- {"type": "text",
- "text": "Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:"},
- {"type": "code_block_margin", "language": "bash",
- "code": "pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow torch"},
- ]},
+ "code": 'uv pip install "transformers>=5.0.0" pillow torch'},
{"type": "label", "text": "Download & Run:"},
{"type": "notebook_code", "language": "python"},
],
@@ -142,15 +136,9 @@
"text": "vLLM support for LFM Vision Models requires a specific version. Install from the custom source below."},
{"type": "label", "text": "Install:"},
{"type": "code_block", "language": "bash",
- "code": "VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 \\\n VLLM_USE_PRECOMPILED=1 \\\n pip install git+https://github.com/vllm-project/vllm.git"},
+ "code": "VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 \\\n VLLM_USE_PRECOMPILED=1 \\\n uv pip install git+https://github.com/vllm-project/vllm.git"},
{"type": "code_block", "language": "bash",
- "code": 'pip install "transformers>=5.0.0" pillow'},
- {"type": "note", "children": [
- {"type": "text",
- "text": "Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:"},
- {"type": "code_block_margin", "language": "bash",
- "code": "pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow"},
- ]},
+ "code": 'uv pip install "transformers>=5.0.0" pillow'},
{"type": "label", "text": "Run:"},
{"type": "notebook_code", "language": "python"},
],
diff --git a/snippets/quickstart/text-transformers.mdx b/snippets/quickstart/text-transformers.mdx
index f6c3cbe..1c63446 100644
--- a/snippets/quickstart/text-transformers.mdx
+++ b/snippets/quickstart/text-transformers.mdx
@@ -3,7 +3,7 @@ export const TextTransformers = ({ modelId, samplingParams }) => (
Install:
-{`pip install "transformers>=5.0.0" torch accelerate`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install "transformers>=5.0.0" torch accelerate`.split('\n').map((line, i) => {line}{'\n'})}
Download & Run:
diff --git a/snippets/quickstart/text-vllm.mdx b/snippets/quickstart/text-vllm.mdx
index 5976a58..55b8671 100644
--- a/snippets/quickstart/text-vllm.mdx
+++ b/snippets/quickstart/text-vllm.mdx
@@ -3,7 +3,7 @@ export const TextVllm = ({ modelId, samplingParams }) => (
Install:
-{`pip install vllm==0.14`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install vllm==0.14`.split('\n').map((line, i) => {line}{'\n'})}
Run:
diff --git a/snippets/quickstart/vl-transformers.mdx b/snippets/quickstart/vl-transformers.mdx
index 782c4f0..0c0acba 100644
--- a/snippets/quickstart/vl-transformers.mdx
+++ b/snippets/quickstart/vl-transformers.mdx
@@ -3,17 +3,9 @@ export const VlTransformers = ({ modelId }) => (
Install:
-{`pip install "transformers>=5.0.0" pillow torch`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install "transformers>=5.0.0" pillow torch`.split('\n').map((line, i) => {line}{'\n'})}
-
-Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:
-
-
-{`pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow torch`.split('\n').map((line, i) => {line}{'\n'})}
-
-
-
Download & Run:
diff --git a/snippets/quickstart/vl-vllm.mdx b/snippets/quickstart/vl-vllm.mdx
index e1f8bf0..70a9a5f 100644
--- a/snippets/quickstart/vl-vllm.mdx
+++ b/snippets/quickstart/vl-vllm.mdx
@@ -8,22 +8,14 @@ vLLM support for LFM Vision Models requires a specific version. Install from the
{`VLLM_PRECOMPILED_WHEEL_COMMIT=72506c98349d6bcd32b4e33eec7b5513453c1502 \\
VLLM_USE_PRECOMPILED=1 \\
- pip install git+https://github.com/vllm-project/vllm.git`.split('\n').map((line, i) => {line}{'\n'})}
+ uv pip install git+https://github.com/vllm-project/vllm.git`.split('\n').map((line, i) => {line}{'\n'})}
-{`pip install "transformers>=5.0.0" pillow`.split('\n').map((line, i) => {line}{'\n'})}
+{`uv pip install "transformers>=5.0.0" pillow`.split('\n').map((line, i) => {line}{'\n'})}
-
-Transformers v5 is newly released. If you encounter issues, fall back to the pinned git source:
-
-
-{`pip install git+https://github.com/huggingface/transformers.git@3c2517727ce28a30f5044e01663ee204deb1cdbe pillow`.split('\n').map((line, i) => {line}{'\n'})}
-
-
-
Run: