@@ -244,7 +244,17 @@ static esp_err_t esp_websocket_client_dispatch_event(esp_websocket_client_handle
244244static esp_err_t esp_websocket_client_abort_connection (esp_websocket_client_handle_t client , esp_websocket_error_type_t error_type )
245245{
246246 ESP_WS_CLIENT_STATE_CHECK (TAG , client , return ESP_FAIL );
247+
248+ // CRITICAL: Check if already closing/closed to prevent double-close
249+ if (client -> state == WEBSOCKET_STATE_CLOSING || client -> state == WEBSOCKET_STATE_UNKNOW ) {
250+ ESP_LOGW (TAG , "Connection already closing/closed, skipping abort" );
251+ return ESP_OK ;
252+ }
253+
247254 esp_transport_close (client -> transport );
255+ // Note: We do NOT set client->transport = NULL here because:
256+ // 1. Transport is reused on reconnect
257+ // 2. We use state instead to track if transport is valid
248258
249259 if (!client -> config -> auto_reconnect ) {
250260 client -> run = false;
@@ -256,6 +266,7 @@ static esp_err_t esp_websocket_client_abort_connection(esp_websocket_client_hand
256266 }
257267 client -> error_handle .error_type = error_type ;
258268 esp_websocket_client_dispatch_event (client , WEBSOCKET_EVENT_DISCONNECTED , NULL , 0 );
269+
259270 return ESP_OK ;
260271}
261272
@@ -679,8 +690,18 @@ static int esp_websocket_client_send_with_exact_opcode(esp_websocket_client_hand
679690 } else {
680691 esp_websocket_client_error (client , "esp_transport_write() returned %d, errno=%d" , ret , errno );
681692 }
693+ ESP_LOGD (TAG , "Calling abort_connection due to send error" );
694+ #ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
695+ xSemaphoreGiveRecursive (client -> tx_lock );
696+ xSemaphoreTakeRecursive (client -> lock , portMAX_DELAY );
697+ esp_websocket_client_abort_connection (client , WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT );
698+ xSemaphoreGiveRecursive (client -> lock );
699+ return ret ;
700+ #else
701+ // Already holding client->lock, safe to call
682702 esp_websocket_client_abort_connection (client , WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT );
683703 goto unlock_and_return ;
704+ #endif
684705 }
685706 opcode = 0 ;
686707 widx += wlen ;
@@ -1019,7 +1040,6 @@ static esp_err_t esp_websocket_client_recv(esp_websocket_client_handle_t client)
10191040 esp_websocket_free_buf (client , false);
10201041 return ESP_OK ;
10211042 }
1022-
10231043 esp_websocket_client_dispatch_event (client , WEBSOCKET_EVENT_DATA , client -> rx_buffer , rlen );
10241044
10251045 client -> payload_offset += rlen ;
@@ -1030,9 +1050,36 @@ static esp_err_t esp_websocket_client_recv(esp_websocket_client_handle_t client)
10301050 const char * data = (client -> payload_len == 0 ) ? NULL : client -> rx_buffer ;
10311051 ESP_LOGD (TAG , "Sending PONG with payload len=%d" , client -> payload_len );
10321052#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
1033- if (xSemaphoreTakeRecursive (client -> tx_lock , WEBSOCKET_TX_LOCK_TIMEOUT_MS ) != pdPASS ) {
1034- ESP_LOGE (TAG , "Could not lock ws-client within %d timeout" , WEBSOCKET_TX_LOCK_TIMEOUT_MS );
1035- return ESP_FAIL ;
1053+ // CRITICAL: To avoid deadlock, we must follow lock ordering: tx_lock BEFORE client->lock
1054+ // Current state: We hold client->lock (acquired by caller at line 1228)
1055+ // We need: tx_lock
1056+ // Deadlock risk: Send error path holds tx_lock and waits for client->lock = circular wait!
1057+ //
1058+ // Solution: Temporarily release client->lock, acquire both locks in correct order
1059+ // 1. Release client->lock
1060+ // 2. Acquire tx_lock (now safe - no deadlock)
1061+ // 3. Re-acquire client->lock (for consistency with caller expectations)
1062+ // 4. Send PONG
1063+ // 5. Release both locks in reverse order
1064+
1065+ xSemaphoreGiveRecursive (client -> lock ); // Release client->lock
1066+
1067+ // Now acquire tx_lock (safe - no circular wait possible)
1068+ if (xSemaphoreTakeRecursive (client -> tx_lock , portMAX_DELAY ) != pdPASS ) {
1069+ ESP_LOGE (TAG , "Failed to acquire tx_lock for PONG" );
1070+ xSemaphoreTakeRecursive (client -> lock , portMAX_DELAY ); // Re-acquire client->lock before returning
1071+ return ESP_OK ; // Return gracefully, caller expects client->lock to be held
1072+ }
1073+
1074+ // Re-acquire client->lock to maintain consistency
1075+ xSemaphoreTakeRecursive (client -> lock , portMAX_DELAY );
1076+
1077+ // CRITICAL: Check if transport is still valid after re-acquiring lock
1078+ // Another thread may have closed it while we didn't hold client->lock
1079+ if (client -> state == WEBSOCKET_STATE_CLOSING || client -> state == WEBSOCKET_STATE_UNKNOW || client -> transport == NULL ) {
1080+ ESP_LOGW (TAG , "Transport closed while preparing PONG, skipping send" );
1081+ xSemaphoreGiveRecursive (client -> tx_lock );
1082+ return ESP_OK ; // Caller expects client->lock to be held, which it is
10361083 }
10371084#endif
10381085 esp_transport_ws_send_raw (client -> transport , WS_TRANSPORT_OPCODES_PONG | WS_TRANSPORT_OPCODES_FIN , data , client -> payload_len ,
@@ -1221,12 +1268,14 @@ static void esp_websocket_client_task(void *pv)
12211268 esp_websocket_client_abort_connection (client , WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT );
12221269 xSemaphoreGiveRecursive (client -> lock );
12231270 } else if (read_select > 0 ) {
1271+ // CRITICAL: Protect entire recv operation with client->lock
1272+ // This prevents transport from being closed while recv is in progress
1273+ xSemaphoreTakeRecursive (client -> lock , lock_timeout );
12241274 if (esp_websocket_client_recv (client ) == ESP_FAIL ) {
12251275 ESP_LOGE (TAG , "Error receive data" );
1226- xSemaphoreTakeRecursive (client -> lock , lock_timeout );
12271276 esp_websocket_client_abort_connection (client , WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT );
1228- xSemaphoreGiveRecursive (client -> lock );
12291277 }
1278+ xSemaphoreGiveRecursive (client -> lock );
12301279 }
12311280 } else if (WEBSOCKET_STATE_WAIT_TIMEOUT == client -> state ) {
12321281 // waiting for reconnection or a request to stop the client...
0 commit comments