diff --git a/apps/llm/app/_layout.tsx b/apps/llm/app/_layout.tsx
index 5ece80f1f..4ab010693 100644
--- a/apps/llm/app/_layout.tsx
+++ b/apps/llm/app/_layout.tsx
@@ -89,6 +89,14 @@ export default function _layout() {
headerTitleStyle: { color: ColorPalette.primary },
}}
/>
+
Voice Chat
+ router.navigate('multimodal_llm/')}
+ >
+ Multimodal LLM (VLM)
+
);
diff --git a/apps/llm/app/multimodal_llm/index.tsx b/apps/llm/app/multimodal_llm/index.tsx
new file mode 100644
index 000000000..1781684a0
--- /dev/null
+++ b/apps/llm/app/multimodal_llm/index.tsx
@@ -0,0 +1,310 @@
+import { useContext, useEffect, useRef, useState } from 'react';
+import {
+ Image,
+ Keyboard,
+ KeyboardAvoidingView,
+ Platform,
+ StyleSheet,
+ Text,
+ TextInput,
+ TouchableOpacity,
+ TouchableWithoutFeedback,
+ View,
+} from 'react-native';
+import { launchImageLibrary } from 'react-native-image-picker';
+import { useIsFocused } from '@react-navigation/native';
+import { useLLM, LFM2_VL_1_6B_QUANTIZED } from 'react-native-executorch';
+import SendIcon from '../../assets/icons/send_icon.svg';
+import PauseIcon from '../../assets/icons/pause_icon.svg';
+import ColorPalette from '../../colors';
+import Messages from '../../components/Messages';
+import Spinner from '../../components/Spinner';
+import { GeneratingContext } from '../../context';
+
+export default function MultimodalLLMScreenWrapper() {
+ const isFocused = useIsFocused();
+ return isFocused ? : null;
+}
+
+function MultimodalLLMScreen() {
+ const [imageUri, setImageUri] = useState(null);
+ const [userInput, setUserInput] = useState('');
+ const [isTextInputFocused, setIsTextInputFocused] = useState(false);
+ const textInputRef = useRef(null);
+ const { setGlobalGenerating } = useContext(GeneratingContext);
+
+ const vlm = useLLM({
+ model: LFM2_VL_1_6B_QUANTIZED,
+ });
+
+ useEffect(() => {
+ setGlobalGenerating(vlm.isGenerating);
+ }, [vlm.isGenerating, setGlobalGenerating]);
+
+ useEffect(() => {
+ if (vlm.error) console.error('MultimodalLLM error:', vlm.error);
+ }, [vlm.error]);
+
+ const pickImage = async () => {
+ const result = await launchImageLibrary({ mediaType: 'photo' });
+ if (result.assets && result.assets.length > 0) {
+ const uri = result.assets[0]?.uri;
+ if (uri) setImageUri(uri);
+ }
+ };
+
+ const sendMessage = async () => {
+ if (!userInput.trim() || vlm.isGenerating) return;
+ const text = userInput.trim();
+ setUserInput('');
+ textInputRef.current?.clear();
+ Keyboard.dismiss();
+ const currentImageUri = imageUri;
+ setImageUri(null);
+ try {
+ await vlm.sendMessage(
+ text,
+ currentImageUri ? { imagePath: currentImageUri } : undefined
+ );
+ } catch (e) {
+ console.error('Generation error:', e);
+ }
+ };
+
+ if (!vlm.isReady) {
+ return (
+
+ );
+ }
+
+ return (
+
+
+
+ {vlm.messageHistory.length ? (
+
+
+
+ ) : (
+
+ Hello! 👋
+
+ Pick an image and ask me anything about it.
+
+
+ )}
+
+ {/* Image thumbnail strip */}
+ {imageUri && (
+
+
+ Tap to change
+
+ )}
+
+
+ {/* Image picker button */}
+
+ 📷
+
+
+ setIsTextInputFocused(true)}
+ onBlur={() => setIsTextInputFocused(false)}
+ style={[
+ styles.textInput,
+ {
+ borderColor: isTextInputFocused
+ ? ColorPalette.blueDark
+ : ColorPalette.blueLight,
+ },
+ ]}
+ placeholder={imageUri ? 'Ask about the image…' : 'Your message'}
+ placeholderTextColor="#C1C6E5"
+ multiline
+ onChangeText={setUserInput}
+ />
+
+ {userInput.trim() && !vlm.isGenerating && (
+
+
+
+ )}
+ {vlm.isGenerating && (
+
+
+
+ )}
+
+
+
+
+ );
+}
+
+const styles = StyleSheet.create({
+ // Setup phase
+ setupContainer: {
+ flex: 1,
+ padding: 24,
+ backgroundColor: '#fff',
+ justifyContent: 'center',
+ },
+ setupTitle: {
+ fontSize: 20,
+ fontFamily: 'medium',
+ color: ColorPalette.primary,
+ marginBottom: 8,
+ },
+ setupHint: {
+ fontSize: 13,
+ fontFamily: 'regular',
+ color: ColorPalette.blueDark,
+ marginBottom: 32,
+ lineHeight: 18,
+ },
+ filePickerRow: {
+ flexDirection: 'row',
+ alignItems: 'center',
+ borderWidth: 1,
+ borderColor: ColorPalette.blueLight,
+ borderRadius: 10,
+ padding: 14,
+ marginBottom: 12,
+ backgroundColor: '#fafbff',
+ },
+ filePickerInfo: { flex: 1 },
+ filePickerLabel: {
+ fontSize: 12,
+ fontFamily: 'medium',
+ color: ColorPalette.blueDark,
+ marginBottom: 2,
+ },
+ filePickerValue: { fontSize: 14, fontFamily: 'regular' },
+ filePickerValueSet: { color: ColorPalette.primary },
+ filePickerValueEmpty: { color: ColorPalette.blueLight },
+ filePickerChevron: {
+ fontSize: 24,
+ color: ColorPalette.blueLight,
+ marginLeft: 8,
+ },
+ loadButton: {
+ marginTop: 16,
+ backgroundColor: ColorPalette.strongPrimary,
+ borderRadius: 10,
+ padding: 14,
+ alignItems: 'center',
+ },
+ loadButtonDisabled: { backgroundColor: ColorPalette.blueLight },
+ loadButtonText: { color: '#fff', fontFamily: 'medium', fontSize: 15 },
+
+ // Chat phase
+ container: { flex: 1 },
+ chatContainer: { flex: 10, width: '100%' },
+ helloMessageContainer: {
+ flex: 10,
+ width: '100%',
+ alignItems: 'center',
+ justifyContent: 'center',
+ },
+ helloText: {
+ fontFamily: 'medium',
+ fontSize: 30,
+ color: ColorPalette.primary,
+ },
+ bottomHelloText: {
+ fontFamily: 'regular',
+ fontSize: 20,
+ lineHeight: 28,
+ textAlign: 'center',
+ color: ColorPalette.primary,
+ paddingHorizontal: 24,
+ },
+ imageThumbnailContainer: {
+ flexDirection: 'row',
+ alignItems: 'center',
+ paddingHorizontal: 16,
+ paddingVertical: 6,
+ gap: 8,
+ },
+ imageThumbnail: {
+ width: 48,
+ height: 48,
+ borderRadius: 8,
+ borderWidth: 1,
+ borderColor: ColorPalette.blueLight,
+ },
+ imageThumbnailHint: {
+ fontSize: 12,
+ fontFamily: 'regular',
+ color: ColorPalette.blueDark,
+ },
+ bottomContainer: {
+ height: 100,
+ width: '100%',
+ flexDirection: 'row',
+ justifyContent: 'space-between',
+ alignItems: 'center',
+ paddingHorizontal: 16,
+ },
+ imageButton: {
+ width: 40,
+ height: 40,
+ justifyContent: 'center',
+ alignItems: 'center',
+ marginRight: 4,
+ },
+ imageButtonText: { fontSize: 22 },
+ textInput: {
+ flex: 1,
+ borderWidth: 1,
+ borderRadius: 8,
+ lineHeight: 19.6,
+ fontFamily: 'regular',
+ fontSize: 14,
+ color: ColorPalette.primary,
+ padding: 16,
+ },
+ sendChatTouchable: {
+ height: '100%',
+ width: 48,
+ justifyContent: 'center',
+ alignItems: 'flex-end',
+ },
+});
diff --git a/apps/llm/components/MessageItem.tsx b/apps/llm/components/MessageItem.tsx
index c4d7d549e..58da5074c 100644
--- a/apps/llm/components/MessageItem.tsx
+++ b/apps/llm/components/MessageItem.tsx
@@ -4,6 +4,7 @@ import {
StyleSheet,
TouchableOpacity,
Text,
+ Image,
Platform,
} from 'react-native';
import MarkdownComponent from './MarkdownComponent';
@@ -17,19 +18,31 @@ interface MessageItemProps {
}
const MessageItem = memo(({ message, deleteMessage }: MessageItemProps) => {
- return (
-
- {message.role === 'assistant' && (
+ if (message.role === 'assistant') {
+ return (
+
- )}
-
+
+
+
+ );
+ }
+
+ return (
+
+
+ {message.mediaPath && (
+
+ )}
+
+
);
});
@@ -64,17 +77,26 @@ const styles = StyleSheet.create({
marginVertical: 8,
alignItems: 'center',
},
- userMessage: {
+ userMessageWrapper: {
flexDirection: 'row-reverse',
- paddingHorizontal: 12,
- paddingVertical: 8,
marginRight: 8,
marginVertical: 8,
maxWidth: '75%',
+ alignSelf: 'flex-end',
+ alignItems: 'flex-start',
+ },
+ userMessageBubble: {
+ flexDirection: 'column',
+ paddingHorizontal: 12,
+ paddingVertical: 8,
borderRadius: 8,
backgroundColor: ColorPalette.seaBlueLight,
- alignSelf: 'flex-end',
- alignItems: 'center',
+ },
+ userMessageImage: {
+ width: 200,
+ height: 200,
+ borderRadius: 6,
+ marginBottom: 6,
},
aiMessageIconContainer: {
backgroundColor: ColorPalette.seaBlueLight,
diff --git a/apps/llm/package.json b/apps/llm/package.json
index f58bc8127..d0fbb6401 100644
--- a/apps/llm/package.json
+++ b/apps/llm/package.json
@@ -19,6 +19,7 @@
"expo-brightness": "~14.0.8",
"expo-calendar": "~15.0.8",
"expo-constants": "~18.0.11",
+ "expo-document-picker": "~13.0.3",
"expo-font": "~14.0.10",
"expo-linking": "~8.0.10",
"expo-router": "~6.0.17",
@@ -30,6 +31,7 @@
"react-native-device-info": "^15.0.2",
"react-native-executorch": "workspace:*",
"react-native-gesture-handler": "~2.28.0",
+ "react-native-image-picker": "^7.2.2",
"react-native-loading-spinner-overlay": "^3.0.1",
"react-native-markdown-display": "^7.0.2",
"react-native-reanimated": "~4.1.1",
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index 7712b2b9d..e6e21e278 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -45,7 +45,9 @@ template class ModelHostObject : public JsiHostObject {
"getInputShape"));
}
- if constexpr (meta::HasGenerate) {
+ // LLM::generate and LLM::generateMultimodal registered explicitly below
+ if constexpr (meta::HasGenerate &&
+ !meta::SameAs) {
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject,
promiseHostFunction<&Model::generate>,
"generate"));
@@ -98,6 +100,10 @@ template class ModelHostObject : public JsiHostObject {
}
if constexpr (meta::SameAs) {
+ addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject,
+ promiseHostFunction<&Model::generate>,
+ "generate"));
+
addFunctions(JSI_EXPORT_FUNCTION(
ModelHostObject, synchronousHostFunction<&Model::interrupt>,
"interrupt"));
@@ -144,6 +150,16 @@ template class ModelHostObject : public JsiHostObject {
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject,
synchronousHostFunction<&Model::reset>,
"reset"));
+
+ addFunctions(
+ JSI_EXPORT_FUNCTION(ModelHostObject,
+ promiseHostFunction<&Model::generateMultimodal>,
+ "generateMultimodal"));
+
+ addFunctions(JSI_EXPORT_FUNCTION(
+ ModelHostObject,
+ synchronousHostFunction<&Model::getVisualTokenCount>,
+ "getVisualTokenCount"));
}
if constexpr (meta::SameAs) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
index 4a9d40033..03afd4ed0 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp
@@ -2,23 +2,42 @@
#include
#include
+#include