createSession method
Future<InferenceModelSession>
createSession({
- double temperature = .8,
- int randomSeed = 1,
- int topK = 1,
- double? topP,
- String? loraPath,
- bool? enableVisionModality,
- bool? enableAudioModality,
- String? systemInstruction,
- bool enableThinking = false,
- List<
Tool> tools = const [],
override
Creates a new InferenceModelSession for generation.
temperature, randomSeed, topK, topP — parameters for sampling.
loraPath — optional path to LoRA model.
enableVisionModality — enable vision modality for multimodal models.
enableAudioModality — enable audio modality for Gemma 3n E4B models.
Implementation
@override
Future<InferenceModelSession> createSession({
double temperature = .8,
int randomSeed = 1,
int topK = 1,
double? topP,
String? loraPath,
bool? enableVisionModality,
bool? enableAudioModality,
String? systemInstruction,
bool enableThinking = false,
List<Tool> tools = const [],
}) async {
if (_isClosed) {
throw StateError(
'Model is closed. Create a new instance to use it again');
}
if (loraPath != null) {
throw UnsupportedError(
'LoRA weights are not supported on the .litertlm FFI path '
'(loraPath=$loraPath). Track upstream LiteRT-LM C API support; '
'remove loraPath or use a MediaPipe .task model on Android/iOS.',
);
}
// Single-flight guard for genuinely *concurrent* callers only. The
// completer is cleared in the `finally` below once creation settles,
// so a *sequential* second call falls through and opens a fresh
// conversation handle (closing the prior session first) instead of
// returning the cached session. Without this, the cached completer
// made every later createChat reuse the first session, so the
// previous conversation's KV cache bled into the next chat. This is
// the litert/FFI sibling of the MediaPipe fix in #309 (issue #308).
if (_createCompleter case Completer<InferenceModelSession> completer) {
return completer.future;
}
final completer = _createCompleter = Completer<InferenceModelSession>();
final sessionSw = Stopwatch()..start();
try {
// Legacy singleton lane: close the previous conversation BEFORE
// opening a fresh one. The engine holds at most one live
// conversation (upstream litert-lm #966), so closing first never
// leaves two alive — matching the delete-before-create order the
// virtual-session multiplexer already uses. It also means a
// teardown error can't leak a handle that was created first and
// then left unwrapped: each session owns its own conversation
// pointer, so closing the old one can't touch the not-yet-created
// new one. See PR #310 review.
await _session?.close();
// For Gemma 4, push tools into the SDK conversation config so it can
// render native `<|tool>declaration:...<tool|>` tokens via minja. Other
// model types still use Dart-side prompt injection in chat.dart.
final toolsJson = (modelType == ModelType.gemma4 && tools.isNotEmpty)
? SdkResponseParser.serializeToolsForSdk(tools)
: null;
final beforeConv = sessionSw.elapsedMilliseconds;
final handle = ffiClient.createConversationHandle(
systemMessage: systemInstruction,
toolsJson: toolsJson,
temperature: temperature,
topK: topK,
topP: topP,
seed: randomSeed,
);
gemmaLog(
'[FfiInferenceModel/perf] createConversation (FFI): ${sessionSw.elapsedMilliseconds - beforeConv}ms');
late final FfiInferenceModelSession session;
session = FfiInferenceModelSession(
handle: handle,
modelType: modelType,
fileType: fileType,
supportImage: enableVisionModality ?? supportImage,
supportAudio: enableAudioModality ?? supportAudio,
enableThinking: enableThinking,
// Identity-guarded so a late close of a superseded session can't
// null a newer `_session`. Does NOT touch `_createCompleter` —
// that is owned by the `finally` below, not by session teardown.
onClose: () {
if (identical(_session, session)) _session = null;
},
);
_session = session;
completer.complete(session);
gemmaLog(
'[FfiInferenceModel/perf] createSession total: ${sessionSw.elapsedMilliseconds}ms');
} catch (e, st) {
completer.completeError(e, st);
} finally {
// Pure in-flight guard: clear once creation settles (success OR
// failure) so the next call isn't blocked by a cached completer
// (the issue #308 KV-cache bleed on success; a permanently cached
// rejected future on failure).
_createCompleter = null;
}
// Return `completer.future` (rather than the session / a rethrow) so
// the caller stays the error listener after the completer field is
// cleared above, and concurrent callers share the same result.
return completer.future;
}