createModel method
- required ModelType modelType,
- ModelFileType fileType = ModelFileType.task,
- int maxTokens = 1024,
- PreferredBackend? preferredBackend,
- List<
int> ? loraRanks, - int? maxNumImages,
- bool supportImage = false,
- bool supportAudio = false,
- bool? enableSpeculativeDecoding,
- int? maxConcurrentSessions,
Creates and returns a new InferenceModel instance.
modelType — model type to create.
maxTokens — maximum context length for the model.
preferredBackend — backend preference (e.g., CPU, GPU).
loraRanks — optional supported LoRA ranks.
maxNumImages — maximum number of images (for multimodal models).
supportImage — whether the model supports images.
supportAudio — whether the model supports audio (Gemma 3n E4B only).
enableSpeculativeDecoding — Multi-Token Prediction toggle for Gemma 4
E2B/E4B (LiteRT-LM v0.11.0+). null honors the model's default;
true/false forces on/off. Older .litertlm files without an MTP
drafter ignore this flag at the SDK level.
maxConcurrentSessions — optional cap on the number of sessions open
at once via InferenceModel.openSession. null (default) = no cap,
backward-compatible. When set, the (cap+1)-th InferenceModel.openSession
throws StateError. Use this on mobile with large models to guard
against OOM from multiple concurrent KV caches.
Implementation
@override
Future<InferenceModel> createModel({
required ModelType modelType,
ModelFileType fileType = ModelFileType.task,
int maxTokens = 1024,
PreferredBackend? preferredBackend,
List<int>? loraRanks,
int? maxNumImages,
bool supportImage = false,
bool supportAudio = false, // Enabling audio support (Gemma 3n E4B)
bool? enableSpeculativeDecoding,
int? maxConcurrentSessions,
}) async {
// Check if model is ready through unified system
final manager = _unifiedManager;
final activeModel = manager.activeInferenceModel;
// No active inference model - user must set one first
if (activeModel == null) {
throw StateError(
'No active inference model set. Use `FlutterGemma.installModel()` or `modelManager.setActiveModel()` to set a model first');
}
// Check if singleton exists and matches the active model
if (_initCompleter != null &&
_initializedModel != null &&
_lastActiveInferenceSpec != null) {
final currentSpec = _lastActiveInferenceSpec!;
final requestedSpec = activeModel as InferenceModelSpec;
if (currentSpec.name != requestedSpec.name) {
// Active model changed - close old model and create new one
gemmaLog(
'⚠️ Active model changed: ${currentSpec.name} → ${requestedSpec.name}');
gemmaLog('🔄 Closing old model and creating new one...');
await _initializedModel?.close();
// close-listener will reset _initializedModel and _initCompleter
_lastActiveInferenceSpec = null;
} else {
// Same model - return existing singleton
gemmaLog(
'ℹ️ Reusing existing model instance for ${requestedSpec.name}');
return _initCompleter!.future;
}
}
// If singleton doesn't exist or was just closed, create new one
if (_initCompleter case Completer<InferenceModel> completer) {
return completer.future;
}
final completer = _initCompleter = Completer<InferenceModel>();
// Verify the active model is still installed
final isModelInstalled = await manager.isModelInstalled(activeModel);
if (!isModelInstalled) {
completer.completeError(
Exception(
'Active model is no longer installed. Use the `modelManager` to load the model first'),
);
return completer.future;
}
// Get the actual model file path through unified system
final modelFilePaths = await manager.getModelFilePaths(activeModel);
if (modelFilePaths == null || modelFilePaths.isEmpty) {
completer.completeError(
Exception(
'Model file paths not found. Use the `modelManager` to load the model first'),
);
return completer.future;
}
final modelPath = modelFilePaths.values.first;
final modelFile = File(modelPath);
if (!await modelFile.exists()) {
completer.completeError(
Exception('Model file not found at path: ${modelFile.path}'),
);
return completer.future;
}
gemmaLog('Using unified model file: $modelPath');
try {
// Engine selection routes ENTIRELY through [EngineRegistry] (probe-chain).
// Core registers NO default engine: both MediaPipe (.task/.bin, from
// flutter_gemma_mediapipe) and LiteRT-LM (.litertlm, from
// flutter_gemma_litertlm) are fully opt-in via
// FlutterGemma.initialize(inferenceEngines: [...]). Core only resolves the
// model path (preamble above) + owns the singleton lifecycle centrally
// (track + reset on close); the selected engine builds the model.
final spec = activeModel as InferenceModelSpec;
final config = RuntimeConfig(
maxTokens: maxTokens,
modelPath: modelPath,
preferredBackend: preferredBackend,
supportImage: supportImage,
supportAudio: supportAudio,
maxNumImages: maxNumImages,
enableSpeculativeDecoding: enableSpeculativeDecoding,
maxConcurrentSessions: maxConcurrentSessions,
loraRanks: loraRanks,
);
final engine = EngineRegistry.instance.findFor(spec);
if (engine == null) {
throw StateError(
'No inference engine can handle this model (ModelFileType.${spec.fileType.name}). '
'Add the engine package to pubspec.yaml and pass it in inferenceEngines: '
'of FlutterGemma.initialize(...). Registered engines: '
'${EngineRegistry.instance.registered.map((e) => e.name).join(", ")}.',
);
}
final model = await engine.createModel(spec, config);
// Core owns the singleton lifecycle: track it + reset on close. The
// package-built model fires this via CloseNotifier (addCloseListener).
_initializedModel = model;
model.addCloseListener(() {
_initializedModel = null;
_initCompleter = null;
_lastActiveInferenceSpec = null;
});
_lastActiveInferenceSpec = spec;
completer.complete(model);
return model;
} catch (e, st) {
// FIX #170: Reset state to allow retry with different model
_initCompleter = null;
_initializedModel = null;
_lastActiveInferenceSpec = null;
completer.completeError(e, st);
Error.throwWithStackTrace(e, st);
}
}