createModel method
- required ModelType modelType,
- ModelFileType fileType = ModelFileType.task,
- int maxTokens = 1024,
- PreferredBackend? preferredBackend,
- List<
int> ? loraRanks, - int? maxNumImages,
- bool supportImage = false,
- bool supportAudio = false,
- bool? enableSpeculativeDecoding,
- int? maxConcurrentSessions,
Creates and returns a new InferenceModel instance.
modelType — model type to create.
maxTokens — maximum context length for the model.
preferredBackend — backend preference (e.g., CPU, GPU).
loraRanks — optional supported LoRA ranks.
maxNumImages — maximum number of images (for multimodal models).
supportImage — whether the model supports images.
supportAudio — whether the model supports audio (Gemma 3n E4B only).
enableSpeculativeDecoding — Multi-Token Prediction toggle for Gemma 4
E2B/E4B (LiteRT-LM v0.11.0+). null honors the model's default;
true/false forces on/off. Older .litertlm files without an MTP
drafter ignore this flag at the SDK level.
maxConcurrentSessions — optional cap on the number of sessions open
at once via InferenceModel.openSession. null (default) = no cap,
backward-compatible. When set, the (cap+1)-th InferenceModel.openSession
throws StateError. Use this on mobile with large models to guard
against OOM from multiple concurrent KV caches.
Implementation
@override
Future<InferenceModel> createModel({
required ModelType modelType,
ModelFileType fileType = ModelFileType.task,
int maxTokens = 1024,
PreferredBackend? preferredBackend,
List<int>? loraRanks,
int? maxNumImages,
bool supportImage = false, // Enabling image support
bool supportAudio = false, // Enabling audio support (Gemma 3n E4B)
bool? enableSpeculativeDecoding, // Ignored on web (MediaPipe path).
int? maxConcurrentSessions,
}) async {
// TODO: Implement multimodal support for web
if (supportImage || maxNumImages != null) {
if (kDebugMode) {
gemmaLog(
'Warning: Image support is not yet implemented for web platform');
}
}
// A cached singleton may exist from a prior createModel call. Core no longer
// imports any concrete web inference-model type (the MediaPipe web model
// moved to flutter_gemma_mediapipe; LiteRT-LM's lives in
// flutter_gemma_litertlm), so it can't introspect the model's params —
// any existing model is always closed + replaced.
if (_initializedModel != null) {
if (kDebugMode) {
gemmaLog(
'[FlutterGemmaWeb] Replacing existing model, closing it first');
}
await _initializedModel!.close();
_initializedModel = null;
}
// Engine selection routes through [EngineRegistry] (probe-chain), mirroring
// the mobile/desktop refactor: .task → MediaPipe, .litertlm → LiteRT-LM.
// Both web engines now live in their own packages (flutter_gemma_mediapipe /
// flutter_gemma_litertlm), supplied via
// FlutterGemma.initialize(inferenceEngines: ...). Web registers NO default;
// each engine builds its own WebModelSourceResolver internally. Web has no
// resolved file path/cache dir (paths are lazy via the resolver), so the
// RuntimeConfig's modelPath is empty.
// Web selection has always been by `fileType` alone; build a minimal spec
// carrying it for the probe (web does not require an active model to be set
// before createModel, so we don't depend on webManager.activeInferenceModel).
final spec = InferenceModelSpec(
name: 'web-active',
modelSource: AssetSource('models/active.bin'),
modelType: modelType,
fileType: fileType,
);
final config = RuntimeConfig(
maxTokens: maxTokens,
modelPath: '',
preferredBackend: preferredBackend,
supportImage: supportImage,
supportAudio: supportAudio,
maxNumImages: maxNumImages,
enableSpeculativeDecoding: enableSpeculativeDecoding,
maxConcurrentSessions: maxConcurrentSessions,
loraRanks: loraRanks,
);
final engine = EngineRegistry.instance.findFor(spec);
if (engine == null) {
throw StateError(
'No inference engine can handle this model (ModelFileType.${spec.fileType.name}). '
'Add the engine package to pubspec.yaml and pass it in inferenceEngines: '
'of FlutterGemma.initialize(...). Registered engines: '
'${EngineRegistry.instance.registered.map((e) => e.name).join(", ")}.',
);
}
final model = await engine.createModel(spec, config);
// Core owns the singleton lifecycle: track it + reset on close. The
// package-built model fires this via CloseNotifier (addCloseListener).
_initializedModel = model;
model.addCloseListener(() {
_initializedModel = null;
});
return model;
}