Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 79 additions & 7 deletions components/esp_websocket_client/esp_websocket_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,16 @@
static esp_err_t esp_websocket_client_abort_connection(esp_websocket_client_handle_t client, esp_websocket_error_type_t error_type)
{
ESP_WS_CLIENT_STATE_CHECK(TAG, client, return ESP_FAIL);

// Note: This function must be called with client->lock already held
// The caller is responsible for acquiring the lock before calling

// CRITICAL: Check if already closing/closed to prevent double-close
if (client->state == WEBSOCKET_STATE_CLOSING || client->state == WEBSOCKET_STATE_UNKNOW) {
ESP_LOGW(TAG, "Connection already closing/closed, skipping abort");
return ESP_OK;
}

esp_transport_close(client->transport);

if (!client->config->auto_reconnect) {
Expand All @@ -256,6 +266,17 @@
}
client->error_handle.error_type = error_type;
esp_websocket_client_dispatch_event(client, WEBSOCKET_EVENT_DISCONNECTED, NULL, 0);

if (client->errormsg_buffer) {
ESP_LOGI(TAG, "Freeing error buffer (%d bytes) - Free heap: %" PRIu32 " bytes",
client->errormsg_size, esp_get_free_heap_size());
free(client->errormsg_buffer);
client->errormsg_buffer = NULL;
client->errormsg_size = 0;
} else {
ESP_LOGI(TAG, "Disconnect - Free heap: %" PRIu32 " bytes", esp_get_free_heap_size());
}

return ESP_OK;
}

Expand Down Expand Up @@ -453,6 +474,8 @@
esp_websocket_client_destroy_config(client);
if (client->transport_list) {
esp_transport_list_destroy(client->transport_list);
client->transport_list = NULL;
client->transport = NULL;
}
vSemaphoreDelete(client->lock);
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
Expand Down Expand Up @@ -679,8 +702,18 @@
} else {
esp_websocket_client_error(client, "esp_transport_write() returned %d, errno=%d", ret, errno);
}
ESP_LOGD(TAG, "Calling abort_connection due to send error");
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
xSemaphoreGiveRecursive(client->tx_lock);
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY);
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
xSemaphoreGiveRecursive(client->lock);
return ret;
#else
// Already holding client->lock, safe to call
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
goto unlock_and_return;
#endif
}
opcode = 0;
widx += wlen;
Expand Down Expand Up @@ -1019,7 +1052,6 @@
esp_websocket_free_buf(client, false);
return ESP_OK;
}

esp_websocket_client_dispatch_event(client, WEBSOCKET_EVENT_DATA, client->rx_buffer, rlen);

client->payload_offset += rlen;
Expand All @@ -1030,15 +1062,47 @@
const char *data = (client->payload_len == 0) ? NULL : client->rx_buffer;
ESP_LOGD(TAG, "Sending PONG with payload len=%d", client->payload_len);
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
// CRITICAL: To avoid deadlock, we must follow lock ordering: tx_lock BEFORE client->lock
// Current state: We hold client->lock (acquired by caller at line 1228)
// We need: tx_lock
// Deadlock risk: Send error path holds tx_lock and waits for client->lock = circular wait!
//
// Solution: Temporarily release client->lock, acquire both locks in correct order
// 1. Release client->lock
// 2. Acquire tx_lock (now safe - no deadlock)
// 3. Re-acquire client->lock (for consistency with caller expectations)
// 4. Send PONG
// 5. Release both locks in reverse order

xSemaphoreGiveRecursive(client->lock); // Release client->lock

// Now acquire tx_lock with timeout (consistent with PING/CLOSE handling)
if (xSemaphoreTakeRecursive(client->tx_lock, WEBSOCKET_TX_LOCK_TIMEOUT_MS) != pdPASS) {
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
return ESP_FAIL;
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout for PONG", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY); // Re-acquire client->lock before returning
return ESP_OK; // Return gracefully, caller expects client->lock to be held
}
#endif

// Re-acquire client->lock to maintain consistency
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY);

// CRITICAL: Check if transport is still valid after re-acquiring lock
// Another thread may have closed it while we didn't hold client->lock
if (client->state == WEBSOCKET_STATE_CLOSING || client->state == WEBSOCKET_STATE_UNKNOW ||
client->state == WEBSOCKET_STATE_WAIT_TIMEOUT || client->transport == NULL) {
ESP_LOGW(TAG, "Transport closed while preparing PONG, skipping send");
xSemaphoreGiveRecursive(client->tx_lock);
return ESP_OK; // Caller expects client->lock to be held, which it is
}

esp_transport_ws_send_raw(client->transport, WS_TRANSPORT_OPCODES_PONG | WS_TRANSPORT_OPCODES_FIN, data, client->payload_len,
client->config->network_timeout_ms);
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
xSemaphoreGiveRecursive(client->tx_lock);
#else
// When separate TX lock is not configured, we already hold client->lock
// which protects the transport, so we can send PONG directly
esp_transport_ws_send_raw(client->transport, WS_TRANSPORT_OPCODES_PONG | WS_TRANSPORT_OPCODES_FIN, data, client->payload_len,

Check warning

Code scanning / clang-tidy

The value '138' provided to the cast expression is not in the valid range of values for 'ws_transport_opcodes' [clang-analyzer-optin.core.EnumCastOutOfRange] Warning

The value '138' provided to the cast expression is not in the valid range of values for 'ws_transport_opcodes' [clang-analyzer-optin.core.EnumCastOutOfRange]
client->config->network_timeout_ms);
#endif
} else if (client->last_opcode == WS_TRANSPORT_OPCODES_PONG) {
client->wait_for_pong_resp = false;
Expand Down Expand Up @@ -1136,6 +1200,11 @@
client->state = WEBSOCKET_STATE_CONNECTED;
client->wait_for_pong_resp = false;
client->error_handle.error_type = WEBSOCKET_ERROR_TYPE_NONE;
client->payload_len = 0;
client->payload_offset = 0;
client->last_fin = false;
client->last_opcode = WS_TRANSPORT_OPCODES_NONE;

esp_websocket_client_dispatch_event(client, WEBSOCKET_EVENT_CONNECTED, NULL, 0);
break;
case WEBSOCKET_STATE_CONNECTED:
Expand Down Expand Up @@ -1221,12 +1290,15 @@
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
xSemaphoreGiveRecursive(client->lock);
} else if (read_select > 0) {
// CRITICAL: Protect entire recv operation with client->lock
// This prevents transport from being closed while recv is in progress
xSemaphoreTakeRecursive(client->lock, lock_timeout);
if (esp_websocket_client_recv(client) == ESP_FAIL) {
ESP_LOGE(TAG, "Error receive data");
xSemaphoreTakeRecursive(client->lock, lock_timeout);
// Note: Already holding client->lock from line above
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
xSemaphoreGiveRecursive(client->lock);
}
xSemaphoreGiveRecursive(client->lock);
}
} else if (WEBSOCKET_STATE_WAIT_TIMEOUT == client->state) {
// waiting for reconnection or a request to stop the client...
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CONFIG_IDF_TARGET="esp32"
CONFIG_IDF_TARGET_LINUX=n
CONFIG_WEBSOCKET_URI_FROM_STDIN=n
CONFIG_WEBSOCKET_URI_FROM_STRING=y
CONFIG_EXAMPLE_CONNECT_ETHERNET=y
CONFIG_EXAMPLE_CONNECT_WIFI=n
CONFIG_EXAMPLE_USE_INTERNAL_ETHERNET=y
CONFIG_EXAMPLE_ETH_PHY_IP101=y
CONFIG_EXAMPLE_ETH_MDC_GPIO=23
CONFIG_EXAMPLE_ETH_MDIO_GPIO=18
CONFIG_EXAMPLE_ETH_PHY_RST_GPIO=5
CONFIG_EXAMPLE_ETH_PHY_ADDR=1
CONFIG_EXAMPLE_CONNECT_IPV6=y
CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK=y
CONFIG_ESP_WS_CLIENT_TX_LOCK_TIMEOUT_MS=2000