Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove hardcoded timeout from proxy readiness probe #2373

Merged
merged 3 commits into from
Jul 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cmd/proxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package main
import (
"context"
"flag"
"fmt"
"net"
"net/http"
"os"
Expand Down Expand Up @@ -203,10 +204,11 @@ func exit(log *zap.SugaredLogger, err error, wrapStrs ...string) {

func readinessTCPHandler(port int, logger *zap.SugaredLogger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
timeout := time.Duration(1) * time.Second
address := net.JoinHostPort("localhost", strconv.FormatInt(int64(port), 10))
ctx := r.Context()
RobertLucian marked this conversation as resolved.
Show resolved Hide resolved
address := net.JoinHostPort("localhost", fmt.Sprintf("%d", port))

conn, err := net.DialTimeout("tcp", address, timeout)
var d net.Dialer
conn, err := d.DialContext(ctx, "tcp", address)
if err != nil {
logger.Warn(errors.Wrap(err, "TCP probe to user-provided container port failed"))
w.WriteHeader(http.StatusInternalServerError)
Expand Down
7 changes: 0 additions & 7 deletions test/e2e/e2e/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
from e2e.utils import (
apis_ready,
api_updated,
api_requests,
wait_on_event,
wait_on_futures,
endpoint_ready,
Expand Down Expand Up @@ -845,7 +844,6 @@ def test_long_running_realtime(
assert len(api_specs) == 1

time_to_run = long_running_config["time_to_run"]
status_code_timeout = long_running_config["status_code_timeout"]

if len(node_groups) > 0:
api_specs[0]["node_groups"] = node_groups
Expand Down Expand Up @@ -881,11 +879,6 @@ def test_long_running_realtime(

counter += 1

printer("verifying number of processed requests using the client")
assert api_requests(
client, api_name, counter, timeout=status_code_timeout
), f"the number of 2xx response codes for api {api_name} doesn't match the expected number {counter}"

except:
# best effort
try:
Expand Down
18 changes: 5 additions & 13 deletions test/e2e/e2e/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,19 @@ def wait_for(fn: Callable[[], bool], timeout=None) -> bool:


def apis_ready(client: cx.Client, api_names: List[str], timeout: Optional[int] = None) -> bool:
def _check_liveness(status):
return status["requested"] == status["ready"] == status["up_to_date"]

def _is_ready():
return all(
[client.get_api(name)["status"]["status_code"] == "status_live" for name in api_names]
)
return all([_check_liveness(client.get_api(name)["status"]) for name in api_names])

return wait_for(_is_ready, timeout=timeout)


def api_updated(client: cx.Client, api_name: str, timeout: Optional[int] = None) -> bool:
def _is_ready():
status = client.get_api(api_name)["status"]
return status["replica_counts"]["requested"] == status["replica_counts"]["updated"]["ready"]

return wait_for(_is_ready, timeout=timeout)


def api_requests(
client: cx.Client, api_name: str, target_requests: int, timeout: Optional[int] = None
) -> bool:
def _is_ready():
return client.get_api(api_name)["metrics"]["network_stats"]["code_2xx"] == target_requests
return status["requested"] == status["ready"]

return wait_for(_is_ready, timeout=timeout)

Expand Down