Skip to content

Commit 70102a0

Browse files
bkonyilucaantonelli
authored andcommitted
[ Tool ] Fix crash from possible DDS startup race (flutter#173362)
It's possible (but unlikely) that two DDS instances can try and start at the same time (e.g., a "flutter run" is initiated while an existing "flutter attach" is waiting for a target to attach to). This can lead to DDS failing to initialize for one of the processes when the VM service disconnects it after the other process successfully invokes the "_yieldControlToDDS" RPC. This change adds some retry logic to avoid crashing in this scenario. Fixes flutter#169265
1 parent 491b8fa commit 70102a0

File tree

15 files changed

+159
-94
lines changed

15 files changed

+159
-94
lines changed

packages/flutter_tools/lib/src/commands/attach.dart

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,6 @@ known, it can be explicitly provided to attach via the command-line, e.g.
370370
return runner.attach(
371371
connectionInfoCompleter: connectionInfoCompleter,
372372
appStartedCompleter: appStartedCompleter,
373-
allowExistingDdsInstance: true,
374373
);
375374
},
376375
device,
@@ -411,10 +410,7 @@ known, it can be explicitly provided to attach via the command-line, e.g.
411410
..setupTerminal();
412411
}),
413412
);
414-
result = await runner.attach(
415-
appStartedCompleter: onAppStart,
416-
allowExistingDdsInstance: true,
417-
);
413+
result = await runner.attach(appStartedCompleter: onAppStart);
418414
if (result != 0) {
419415
throwToolExit(null, exitCode: result);
420416
}

packages/flutter_tools/lib/src/isolated/resident_web_runner.dart

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,6 @@ class ResidentWebRunner extends ResidentRunner {
765765
Completer<DebugConnectionInfo>? connectionInfoCompleter,
766766
Completer<void>? appStartedCompleter,
767767
Future<ConnectionResult?>? connectDebug,
768-
bool allowExistingDdsInstance = false,
769768
bool needsFullRestart = true,
770769
}) async {
771770
if (_chromiumLauncher != null) {

packages/flutter_tools/lib/src/resident_runner.dart

Lines changed: 87 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,6 @@ class FlutterDevice {
235235
PrintStructuredErrorLogMethod? printStructuredErrorLogMethod,
236236
required DebuggingOptions debuggingOptions,
237237
int? hostVmServicePort,
238-
required bool allowExistingDdsInstance,
239238
}) {
240239
final completer = Completer<void>();
241240
late StreamSubscription<void> subscription;
@@ -256,41 +255,99 @@ class FlutterDevice {
256255
}
257256
}
258257

259-
// First check if the VM service is actually listening on vmServiceUri as
260-
// this may not be the case when scraping logcat for URIs. If this URI is
261-
// from an old application instance, we shouldn't try and start DDS.
262-
try {
263-
service = await connectToVmService(vmServiceUri!, logger: globals.logger);
264-
await service.dispose();
265-
} on Exception catch (exception) {
266-
globals.printTrace('Fail to connect to service protocol: $vmServiceUri: $exception');
267-
if (!completer.isCompleted && !_isListeningForVmServiceUri!) {
268-
completer.completeError('failed to connect to $vmServiceUri $exception');
258+
const kMaxAttempts = 3;
259+
for (var attempts = 1; attempts <= kMaxAttempts; ++attempts) {
260+
void handleVmServiceCheckException(Exception e) {
261+
globals.printTrace('Fail to connect to service protocol: $vmServiceUri: $e');
262+
if (!completer.isCompleted && !_isListeningForVmServiceUri!) {
263+
completer.completeError('failed to connect to $vmServiceUri $e');
264+
}
265+
}
266+
267+
// First check if the VM service is actually listening on vmServiceUri as
268+
// this may not be the case when scraping logcat for URIs. If this URI is
269+
// from an old application instance, we shouldn't try and start DDS.
270+
try {
271+
service = await connectToVmService(vmServiceUri!, logger: globals.logger);
272+
await service.dispose();
273+
} on vm_service.RPCError catch (e, st) {
274+
if (!e.isConnectionDisposedException) {
275+
handleVmServiceCheckException(e);
276+
return;
277+
}
278+
// It's possible (but unlikely) that two DDS instances can try and start at the same
279+
// time (e.g., a "flutter run" is initiated while an existing "flutter attach" is
280+
// waiting for a target to attach to). This can lead to the initial VM service connection
281+
// failing for one of the processes when the VM service disconnects it after the other
282+
// instance successfully invoked the "_yieldControlToDDS" RPC.
283+
//
284+
// To handle this, we retry connecting to the VM service, which should successfully
285+
// be redirected to the DDS instance.
286+
//
287+
// See https://github.com/flutter/flutter/issues/169265 for details.
288+
if (attempts == kMaxAttempts) {
289+
globals.printTrace(
290+
'Failed to make initial connection to VM Service (attempt $attempts of $kMaxAttempts).',
291+
);
292+
handleError(e, st);
293+
return;
294+
}
295+
// Exponential backoff.
296+
final int backoffPeriod = (1 << (attempts - 1)) * 100;
297+
globals.printTrace(
298+
'Failed to make initial connection to VM Service (attempt $attempts of $kMaxAttempts). '
299+
'Retrying in ${backoffPeriod}ms...',
300+
);
301+
await Future<void>.delayed(Duration(milliseconds: backoffPeriod));
302+
} on Exception catch (e) {
303+
handleVmServiceCheckException(e);
304+
return;
269305
}
270-
return;
271306
}
272307

273-
// This first try block is meant to catch errors that occur during DDS startup
274-
// (e.g., failure to bind to a port, failure to connect to the VM service,
275-
// attaching to a VM service with existing clients, etc.).
276-
try {
277-
await device!.dds.startDartDevelopmentServiceFromDebuggingOptions(
278-
vmServiceUri,
279-
debuggingOptions: debuggingOptions,
280-
);
281-
} on DartDevelopmentServiceException catch (e, st) {
282-
if (!allowExistingDdsInstance ||
283-
(e.errorCode != DartDevelopmentServiceException.existingDdsInstanceError)) {
308+
for (var attempts = 1; attempts <= kMaxAttempts; ++attempts) {
309+
// This try block is meant to catch errors that occur during DDS startup
310+
// (e.g., failure to bind to a port, failure to connect to the VM service,
311+
// attaching to a VM service with existing clients, etc.).
312+
try {
313+
await device!.dds.startDartDevelopmentServiceFromDebuggingOptions(
314+
vmServiceUri!,
315+
debuggingOptions: debuggingOptions,
316+
);
317+
break;
318+
} on DartDevelopmentServiceException catch (e, st) {
319+
if (e.errorCode == DartDevelopmentServiceException.existingDdsInstanceError) {
320+
existingDds = true;
321+
break;
322+
}
323+
// It's possible (but unlikely) that two DDS instances can try and start at the same
324+
// time (e.g., a "flutter run" is initiated while an existing "flutter attach" is
325+
// waiting for a target to attach to). This leads to DDS failing to initialize for
326+
// one of the processes when the VM service disconnects it after the other instance
327+
// successfully invoked the "_yieldControlToDDS" RPC.
328+
//
329+
// To handle this, we retry to start DDS after a short delay, which should result in
330+
// an existingDdsInstanceError if the failure to start was due to a startup race.
331+
//
332+
// See https://github.com/flutter/flutter/issues/169265 for details.
333+
if (attempts == kMaxAttempts) {
334+
globals.printTrace('Failed to start DDS (attempt $attempts of $kMaxAttempts).');
335+
handleError(e, st);
336+
return;
337+
}
338+
// Exponential backoff.
339+
final int backoffPeriod = (1 << (attempts - 1)) * 100;
340+
globals.printTrace(
341+
'Failed to start DDS (attempt $attempts of $kMaxAttempts). '
342+
'Retrying in ${backoffPeriod}ms...',
343+
);
344+
await Future<void>.delayed(Duration(milliseconds: backoffPeriod));
345+
} on ToolExit {
346+
rethrow;
347+
} on Exception catch (e, st) {
284348
handleError(e, st);
285349
return;
286-
} else {
287-
existingDds = true;
288350
}
289-
} on ToolExit {
290-
rethrow;
291-
} on Exception catch (e, st) {
292-
handleError(e, st);
293-
return;
294351
}
295352
}
296353
// This second try block handles cases where the VM service connection goes down
@@ -1144,7 +1201,6 @@ abstract class ResidentRunner extends ResidentHandlers {
11441201
Future<int?> attach({
11451202
Completer<DebugConnectionInfo>? connectionInfoCompleter,
11461203
Completer<void>? appStartedCompleter,
1147-
bool allowExistingDdsInstance = false,
11481204
bool needsFullRestart = true,
11491205
});
11501206

@@ -1227,7 +1283,6 @@ abstract class ResidentRunner extends ResidentHandlers {
12271283
await residentDevtoolsHandler!.shutdown();
12281284
await stopEchoingDeviceLog();
12291285
await preExit();
1230-
shutdownDartDevelopmentService();
12311286
appFinished();
12321287
}
12331288

@@ -1298,7 +1353,6 @@ abstract class ResidentRunner extends ResidentHandlers {
12981353
ReloadSources? reloadSources,
12991354
Restart? restart,
13001355
CompileExpression? compileExpression,
1301-
required bool allowExistingDdsInstance,
13021356
}) async {
13031357
if (!debuggingOptions.debuggingEnabled) {
13041358
throw Exception('The service protocol is not enabled.');
@@ -1311,7 +1365,6 @@ abstract class ResidentRunner extends ResidentHandlers {
13111365
reloadSources: reloadSources,
13121366
restart: restart,
13131367
compileExpression: compileExpression,
1314-
allowExistingDdsInstance: allowExistingDdsInstance,
13151368
hostVmServicePort: debuggingOptions.hostVmServicePort,
13161369
printStructuredErrorLogMethod: printStructuredErrorLog,
13171370
);

packages/flutter_tools/lib/src/run_cold.dart

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class ColdRunner extends ResidentRunner {
6767
// Connect to the VM Service.
6868
if (debuggingEnabled) {
6969
try {
70-
await connectToServiceProtocol(allowExistingDdsInstance: false);
70+
await connectToServiceProtocol();
7171
} on Exception catch (exception) {
7272
globals.printError(exception.toString());
7373
appFailedToStart();
@@ -138,12 +138,11 @@ class ColdRunner extends ResidentRunner {
138138
Future<int> attach({
139139
Completer<DebugConnectionInfo>? connectionInfoCompleter,
140140
Completer<void>? appStartedCompleter,
141-
bool allowExistingDdsInstance = false,
142141
bool needsFullRestart = true,
143142
}) async {
144143
_didAttach = true;
145144
try {
146-
await connectToServiceProtocol(allowExistingDdsInstance: allowExistingDdsInstance);
145+
await connectToServiceProtocol();
147146
} on Exception catch (error) {
148147
globals.printError('Error connecting to the service protocol: $error');
149148
return 2;

packages/flutter_tools/lib/src/run_hot.dart

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -233,30 +233,26 @@ class HotRunner extends ResidentRunner {
233233
Future<int> attach({
234234
Completer<DebugConnectionInfo>? connectionInfoCompleter,
235235
Completer<void>? appStartedCompleter,
236-
bool allowExistingDdsInstance = false,
237236
bool needsFullRestart = true,
238237
}) async {
239238
stopAppDuringCleanup = false;
240239
return _attach(
241240
connectionInfoCompleter: connectionInfoCompleter,
242241
appStartedCompleter: appStartedCompleter,
243-
allowExistingDdsInstance: allowExistingDdsInstance,
244242
needsFullRestart: needsFullRestart,
245243
);
246244
}
247245

248246
Future<int> _attach({
249247
Completer<DebugConnectionInfo>? connectionInfoCompleter,
250248
Completer<void>? appStartedCompleter,
251-
bool allowExistingDdsInstance = false,
252249
bool needsFullRestart = true,
253250
}) async {
254251
try {
255252
await connectToServiceProtocol(
256253
reloadSources: _reloadSourcesService,
257254
restart: _restartService,
258255
compileExpression: _compileExpressionService,
259-
allowExistingDdsInstance: allowExistingDdsInstance,
260256
);
261257
// Catches all exceptions, non-Exception objects are rethrown.
262258
} catch (error) {

packages/flutter_tools/lib/src/vmservice.dart

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,9 @@ Future<vm_service.VmService> setUpVmService({
326326
try {
327327
await Future.wait(registrationRequests);
328328
} on vm_service.RPCError catch (e) {
329+
if (e.isConnectionDisposedException) {
330+
rethrow;
331+
}
329332
throwToolExit('Failed to register service methods on attached VM Service: $e');
330333
}
331334
return vmService;
@@ -483,9 +486,7 @@ class FlutterVmService {
483486
try {
484487
return await service.getVM();
485488
} on vm_service.RPCError catch (err) {
486-
if (err.code == vm_service.RPCErrorKind.kServiceDisappeared.code ||
487-
err.code == vm_service.RPCErrorKind.kConnectionDisposed.code ||
488-
err.message.contains('Service connection disposed')) {
489+
if (err.isConnectionDisposedException) {
489490
globals.printTrace('VmService.getVm call failed: $err');
490491
return null;
491492
}
@@ -505,9 +506,7 @@ class FlutterVmService {
505506
// and should begin to shutdown due to the service connection closing.
506507
// Swallow the exception here and let the shutdown logic elsewhere deal
507508
// with cleaning up.
508-
if (e.code == vm_service.RPCErrorKind.kServiceDisappeared.code ||
509-
e.code == vm_service.RPCErrorKind.kConnectionDisposed.code ||
510-
e.message.contains('Service connection disposed')) {
509+
if (e.isConnectionDisposedException) {
511510
return null;
512511
}
513512
rethrow;
@@ -773,10 +772,8 @@ class FlutterVmService {
773772
} on vm_service.RPCError catch (err) {
774773
// If an application is not using the framework or the VM service
775774
// disappears while handling a request, return null.
776-
if ((err.code == vm_service.RPCErrorKind.kMethodNotFound.code) ||
777-
(err.code == vm_service.RPCErrorKind.kServiceDisappeared.code) ||
778-
(err.code == vm_service.RPCErrorKind.kConnectionDisposed.code) ||
779-
(err.message.contains('Service connection disposed'))) {
775+
if (err.code == vm_service.RPCErrorKind.kMethodNotFound.code ||
776+
err.isConnectionDisposedException) {
780777
return null;
781778
}
782779
rethrow;
@@ -1010,3 +1007,10 @@ String processVmServiceMessage(vm_service.Event event) {
10101007
}
10111008
return message;
10121009
}
1010+
1011+
extension RPCErrorExtension on vm_service.RPCError {
1012+
bool get isConnectionDisposedException =>
1013+
code == vm_service.RPCErrorKind.kServiceDisappeared.code ||
1014+
code == vm_service.RPCErrorKind.kConnectionDisposed.code ||
1015+
message.contains('Service connection disposed');
1016+
}

0 commit comments

Comments
 (0)