createModel method
- required ModelType modelType,
- ModelFileType fileType = ModelFileType.task,
- int maxTokens = 1024,
- PreferredBackend? preferredBackend,
- List<
int> ? loraRanks, - int? maxNumImages,
- bool supportImage = false,
- bool supportAudio = false,
- bool? enableSpeculativeDecoding,
- int? maxConcurrentSessions,
Creates and returns a new InferenceModel instance.
modelType — model type to create.
maxTokens — maximum context length for the model.
preferredBackend — backend preference (e.g., CPU, GPU).
loraRanks — optional supported LoRA ranks.
maxNumImages — maximum number of images (for multimodal models).
supportImage — whether the model supports images.
supportAudio — whether the model supports audio (Gemma 3n E4B only).
enableSpeculativeDecoding — Multi-Token Prediction toggle for Gemma 4
E2B/E4B (LiteRT-LM v0.11.0+). null honors the model's default;
true/false forces on/off. Older .litertlm files without an MTP
drafter ignore this flag at the SDK level.
maxConcurrentSessions — optional cap on the number of sessions open
at once via InferenceModel.openSession. null (default) = no cap,
backward-compatible. When set, the (cap+1)-th InferenceModel.openSession
throws StateError. Use this on mobile with large models to guard
against OOM from multiple concurrent KV caches.
Implementation
@override
Future<InferenceModel> createModel({
required ModelType modelType,
ModelFileType fileType = ModelFileType.task,
int maxTokens = 1024,
PreferredBackend? preferredBackend,
List<int>? loraRanks,
int? maxNumImages,
bool supportImage = false,
bool supportAudio = false,
bool? enableSpeculativeDecoding,
int? maxConcurrentSessions,
}) async {
// Check active model
final activeModel = _modelManager.activeInferenceModel;
if (activeModel == null) {
throw StateError(
'No active inference model set. Use `FlutterGemma.installModel()` or `modelManager.setActiveModel()` first',
);
}
// Check if singleton exists and matches active model + runtime params
if (_initCompleter != null &&
_initializedModel != null &&
_lastActiveInferenceSpec != null) {
final currentSpec = _lastActiveInferenceSpec!;
final requestedSpec = activeModel as InferenceModelSpec;
final modelChanged = currentSpec.name != requestedSpec.name;
final p = _lastInferenceParams;
final paramsChanged = p != null &&
(p.supportImage != supportImage ||
p.supportAudio != supportAudio ||
p.maxTokens != maxTokens);
if (modelChanged || paramsChanged) {
gemmaLog(
'Model recreation: modelChanged=$modelChanged, paramsChanged=$paramsChanged');
await _initializedModel?.close();
_initCompleter = null;
_initializedModel = null;
_lastActiveInferenceSpec = null;
_lastInferenceParams = null;
} else {
gemmaLog('Reusing existing model instance for ${requestedSpec.name}');
return _initCompleter!.future;
}
}
// Return existing completer if initialization in progress
if (_initCompleter case Completer<InferenceModel> completer) {
return completer.future;
}
final completer = _initCompleter = Completer<InferenceModel>();
try {
// Verify model is installed
final isInstalled = await _modelManager.isModelInstalled(activeModel);
if (!isInstalled) {
throw Exception('Active model is no longer installed');
}
// Get model file path
final modelFilePaths = await _modelManager.getModelFilePaths(activeModel);
if (modelFilePaths == null || modelFilePaths.isEmpty) {
throw Exception('Model file paths not found');
}
final modelPath = modelFilePaths.values.first;
gemmaLog('[FlutterGemmaDesktop] Using model: $modelPath');
// Core resolves the model path + owns the singleton lifecycle, then
// dispatches construction polymorphically through the EngineRegistry.
// Desktop registers NO default engine — the LiteRtLmEngine is supplied
// via FlutterGemma.initialize(inferenceEngines:). If the registry is
// empty (or no engine canHandle the spec), the findFor==null StateError
// below fires. Desktop is litertlm-only; a `.task` request would simply
// find no matching engine.
final spec = activeModel as InferenceModelSpec;
final config = RuntimeConfig(
maxTokens: maxTokens,
modelPath: modelPath,
preferredBackend: preferredBackend,
supportImage: supportImage,
supportAudio: supportAudio,
maxNumImages: maxNumImages,
enableSpeculativeDecoding: enableSpeculativeDecoding,
maxConcurrentSessions: maxConcurrentSessions,
);
final engine = EngineRegistry.instance.findFor(spec);
if (engine == null) {
throw StateError(
'No inference engine can handle this model (ModelFileType.${spec.fileType.name}). '
'Add the engine package to pubspec.yaml and pass it in inferenceEngines: '
'of FlutterGemma.initialize(...). Registered engines: '
'${EngineRegistry.instance.registered.map((e) => e.name).join(", ")}.',
);
}
final model = await engine.createModel(spec, config);
// Core owns the singleton lifecycle: track it + reset on close. The
// package-built model fires this via CloseNotifier (addCloseListener).
_initializedModel = model;
_lastInferenceParams = (
supportImage: supportImage,
supportAudio: supportAudio,
maxTokens: maxTokens,
);
model.addCloseListener(() {
_initializedModel = null;
_initCompleter = null;
_lastActiveInferenceSpec = null;
_lastInferenceParams = null;
});
_lastActiveInferenceSpec = spec;
completer.complete(model);
return model;
} catch (e, st) {
completer.completeError(e, st);
_initCompleter = null;
_initializedModel = null;
_lastActiveInferenceSpec = null;
_lastInferenceParams = null;
rethrow;
}
}