initialize method

Future<void> initialize({
  1. required String modelPath,
  2. String backend = 'gpu',
  3. int maxTokens = 2048,
  4. String? cacheDir,
  5. bool enableVision = false,
  6. int maxNumImages = 0,
  7. bool enableAudio = false,
  8. bool? enableSpeculativeDecoding,
})

Initialize the engine with model path and settings.

Implementation

Future<void> initialize({
  required String modelPath,
  String backend = 'gpu',
  int maxTokens = 2048,
  String? cacheDir,
  bool enableVision = false,
  int maxNumImages = 0,
  bool enableAudio = false,
  bool? enableSpeculativeDecoding,
}) async {
  final initSw = Stopwatch()..start();
  _ensureBindings();
  _backend = backend;
  final bindingsMs = initSw.elapsedMilliseconds;
  debugPrint('[LiteRtLmFfi/perf] _ensureBindings: ${bindingsMs}ms');
  final b = _bindings!;

  // Create engine settings
  final modelPathPtr = modelPath.toNativeUtf8();
  final backendPtr = backend.toNativeUtf8();
  final visionBackendPtr = enableVision ? backend.toNativeUtf8() : nullptr;
  final audioBackendPtr = enableAudio ? 'cpu'.toNativeUtf8() : nullptr;

  try {
    final settingsCreateStart = initSw.elapsedMilliseconds;
    final settings = b.litert_lm_engine_settings_create(
      modelPathPtr.cast(),
      backendPtr.cast(),
      visionBackendPtr == nullptr ? nullptr : visionBackendPtr.cast(),
      audioBackendPtr == nullptr ? nullptr : audioBackendPtr.cast(),
    );
    debugPrint(
        '[LiteRtLmFfi/perf] settings_create: ${initSw.elapsedMilliseconds - settingsCreateStart}ms');

    if (settings == nullptr) {
      throw Exception('Failed to create engine settings');
    }

    // Configure settings
    b.litert_lm_engine_settings_set_max_num_tokens(settings, maxTokens);

    // Enable benchmarking for session metrics (token counts, timing)
    b.litert_lm_engine_settings_enable_benchmark(settings);

    if (cacheDir != null) {
      final cacheDirPtr = cacheDir.toNativeUtf8();
      // Sets cache dir on main, vision, and audio executors (C API patched)
      b.litert_lm_engine_settings_set_cache_dir(settings, cacheDirPtr.cast());
      calloc.free(cacheDirPtr);
    }

    if (maxNumImages > 0) {
      b.litert_lm_engine_settings_set_max_num_images(settings, maxNumImages);
    }

    // MTP / speculative decoding (LiteRT-LM v0.11.0+). Skip when null so
    // the SDK uses the model's default; only call when caller explicitly
    // forces on/off.
    if (enableSpeculativeDecoding != null) {
      b.litert_lm_engine_settings_set_enable_speculative_decoding(
          settings, enableSpeculativeDecoding);
    }

    // Windows NPU: point LiteRT at the directory containing
    // `LiteRtDispatch.dll` and disable HW mask update path. Native Assets
    // bundles both DLLs next to the executable, so resolvedExecutable.parent
    // is the right path. Without `dispatch_lib_dir` LiteRT reads
    // uninitialized env-option memory and engine_create crashes; without
    // `use_hw_masking_for_npu(false)` LiteRT sets up the kWH HW mask method
    // which Intel preview NPU (LunarLake/PantherLake) doesn't fully support
    // → CFG check failure 0xc0000409 (per Matt Kreileder's Intel NPU
    // pipeline instructions).
    if (Platform.isWindows && backend == 'npu') {
      final exeDir = File(Platform.resolvedExecutable).parent.path;
      final dirPtr = exeDir.toNativeUtf8();
      b.litert_lm_engine_settings_set_litert_dispatch_lib_dir(
          settings, dirPtr.cast());
      calloc.free(dirPtr);
      b.litert_lm_engine_settings_set_use_hw_masking_for_npu(settings, false);
      debugPrint(
          '[LiteRtLmFfi] NPU Windows: dispatch_lib_dir=$exeDir, use_hw_masking_for_npu=false');
    }

    // Android NPU: point LiteRT at the app's nativeLibraryDir so it can
    // dlopen libLiteRtDispatch_Qualcomm.so from there. On Android, Native
    // Assets unpacks all bundled .so files into nativeLibraryDir at install
    // time; without this setting LiteRT searches system paths and fails.
    if (Platform.isAndroid && backend == 'npu') {
      const bundledChannel = MethodChannel('flutter_gemma_bundled');
      final nativeLibDir =
          await bundledChannel.invokeMethod<String>('getNativeLibraryDir');
      if (nativeLibDir == null) {
        throw StateError(
            '[LiteRtLmFfi] NPU Android: getNativeLibraryDir returned null — '
            'plugin channel not registered; cannot locate '
            'libLiteRtDispatch_Qualcomm.so.');
      }
      final dirPtr = nativeLibDir.toNativeUtf8();
      b.litert_lm_engine_settings_set_litert_dispatch_lib_dir(
          settings, dirPtr.cast());
      calloc.free(dirPtr);
      debugPrint('[LiteRtLmFfi] NPU Android: dispatch_lib_dir=$nativeLibDir');
    }

    // Create engine in a background isolate to avoid blocking UI.
    // Pass settings pointer as int address (Pointer can't cross isolates).
    debugPrint(
        '[LiteRtLmFfi] Creating engine from $modelPath (backend=$backend, maxTokens=$maxTokens) ...');
    debugPrint(
        '[LiteRtLmFfi/perf] === START litert_lm_engine_create (native — model load + accelerator init + KV cache prefill) ===');
    final settingsAddr = settings.address;
    final sw = Stopwatch()..start();
    final engineAddr = await Isolate.run(() {
      final isolateSw = Stopwatch()..start();
      final lib = Platform.isIOS
          ? DynamicLibrary.open(
              '@executable_path/Frameworks/LiteRtLm.framework/LiteRtLm')
          : Platform.isMacOS
              ? DynamicLibrary.open('LiteRtLm.framework/LiteRtLm')
              : (Platform.isLinux || Platform.isAndroid)
                  ? DynamicLibrary.open('libLiteRtLm.so')
                  : DynamicLibrary.open('LiteRtLm.dll');
      // ignore: avoid_print
      print(
          '[LiteRtLmFfi/perf]   isolate: DynamicLibrary.open: ${isolateSw.elapsedMilliseconds}ms');
      final lookupStart = isolateSw.elapsedMilliseconds;
      final create = lib.lookupFunction<Pointer Function(Pointer),
          Pointer Function(Pointer)>('litert_lm_engine_create');
      // ignore: avoid_print
      print(
          '[LiteRtLmFfi/perf]   isolate: lookupFunction: ${isolateSw.elapsedMilliseconds - lookupStart}ms');
      final createStart = isolateSw.elapsedMilliseconds;
      final ptr = create(Pointer.fromAddress(settingsAddr)).address;
      // ignore: avoid_print
      print(
          '[LiteRtLmFfi/perf]   isolate: native litert_lm_engine_create: ${isolateSw.elapsedMilliseconds - createStart}ms');
      return ptr;
    });
    _engine = Pointer<LiteRtLmEngine>.fromAddress(engineAddr);
    sw.stop();
    debugPrint(
        '[LiteRtLmFfi/perf] === END litert_lm_engine_create: ${sw.elapsedMilliseconds}ms (includes isolate spawn ~50-200ms) ===');
    debugPrint(
        '[LiteRtLmFfi] litert_lm_engine_create took ${sw.elapsedMilliseconds}ms');
    b.litert_lm_engine_settings_delete(settings);

    if (_engine == null || _engine == nullptr) {
      _dumpNativeLog();
      throw Exception(
          'Failed to create engine. Model may be invalid: $modelPath');
    }

    _isInitialized = true;
    debugPrint(
        '[LiteRtLmFfi/perf] initialize() total: ${initSw.elapsedMilliseconds}ms');
    debugPrint('[LiteRtLmFfi] Engine initialized successfully');

    // Auto-dump the SDK's stderr log after successful engine_create so
    // users can see what happens inside the native call (model load time,
    // accelerator init, sampler dlopen attempts, KV cache prefill, etc.).
    // No-op when stderr redirection isn't wired (release / Android /
    // Windows). Safe to call before _isInitialized was true since the
    // dump only reads a file, doesn't touch native state.
    _dumpNativeLog();
  } finally {
    calloc.free(modelPathPtr);
    calloc.free(backendPtr);
    if (visionBackendPtr != nullptr) calloc.free(visionBackendPtr);
    if (audioBackendPtr != nullptr) calloc.free(audioBackendPtr);
  }
}