Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: issue where canary overwrites default backend #10072

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions rootfs/etc/nginx/lua/balancer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ local function get_implementation(backend)
return implementation
end

-- used to get the IP address of the upstream to set as the
-- backends endpoint to route to
local function resolve_external_names(original_backend)
local backend = util.deepcopy(original_backend)
local endpoints = {}
Expand Down Expand Up @@ -181,6 +183,7 @@ local function sync_backends()
backends_last_synced_at = raw_backends_last_synced_at
end

-- logic used to pick up if request should be routed to an alternative backend
local function route_to_alternative_balancer(balancer)
if balancer.is_affinitized(balancer) then
-- If request is already affinitized to a primary balancer, keep the primary balancer.
Expand Down Expand Up @@ -218,7 +221,6 @@ local function route_to_alternative_balancer(balancer)
"of backend: ", tostring(backend_name))
return false
end

local target_header = util.replace_special_char(traffic_shaping_policy.header,
"-", "_")
local header = ngx.var["http_" .. target_header]
Expand Down Expand Up @@ -278,14 +280,15 @@ local function get_balancer()
local backend_name = ngx.var.proxy_upstream_name

local balancer = balancers[backend_name]

if not balancer then
return nil
end

if route_to_alternative_balancer(balancer) then
--we should not overwrite balancer when it is the default backend
if route_to_alternative_balancer(balancer) and not balancer.is_default_backend then
local alternative_backend_name = balancer.alternative_backends[1]
ngx.var.proxy_alternative_upstream_name = alternative_backend_name

balancer = balancers[alternative_backend_name]
end

Expand Down Expand Up @@ -318,6 +321,7 @@ end

function _M.rewrite()
local balancer = get_balancer()

if not balancer then
ngx.status = ngx.HTTP_SERVICE_UNAVAILABLE
return ngx.exit(ngx.status)
Expand All @@ -344,6 +348,7 @@ function _M.balance()
ngx_balancer.set_more_tries(1)

local ok, err = ngx_balancer.set_current_peer(peer)

if not ok then
ngx.log(ngx.ERR, "error while setting current upstream peer ", peer,
": ", err)
Expand All @@ -363,6 +368,16 @@ function _M.log()
balancer:after_balance()
end

--this is used to check if we are routing to the
--default backend for sepcific error codes so that we do not overwrite it with
--alternative routes
--https://github.com/kubernetes/ingress-nginx/issues/9944
function _M.is_default_backend()
if ngx.ctx.balancer then
ngx.ctx.balancer.is_default_backend = true
end
end

setmetatable(_M, {__index = {
get_implementation = get_implementation,
sync_backend = sync_backend,
Expand Down
4 changes: 4 additions & 0 deletions rootfs/etc/nginx/template/nginx.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,10 @@ stream {
rewrite (.*) / break;

proxy_pass http://upstream_balancer;
access_by_lua_block {
balancer.is_default_backend()
}

log_by_lua_block {
{{ if $enableMetrics }}
monitor.call()
Expand Down
1 change: 1 addition & 0 deletions test/e2e/DEFAULT_BACKEND_IMAGE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
registry.k8s.io/ingress-nginx/nginx-errors:v20230505@sha256:3600dcd1bbd0d05959bb01af4b272714e94d22d24a64e91838e7183c80e53f7f
48 changes: 48 additions & 0 deletions test/e2e/annotations/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,54 @@ var _ = framework.DescribeAnnotation("canary-*", func() {
NotContains(canaryService)
})

// issue: https://github.com/kubernetes/ingress-nginx/issues/9944
// canary routing should not overwrite custom errors
ginkgo.It("should respond with a 401 status from the custom errors backend when canary responds with a 401",
func() {
host := "foo"
annotations := map[string]string{
"nginx.ingress.kubernetes.io/custom-http-errors": "401",
"nginx.ingress.kubernetes.io/default-backend": framework.DefaultBackendService,
}

f.NewDefaultBackendDeployment()

f.EnsureIngress(framework.NewSingleIngress(
host,
"/",
host,
f.Namespace,
framework.HTTPBunService,
80,
annotations,
))

f.WaitForNginxServer(host, func(server string) bool {
return strings.Contains(server, "server_name foo")
})

f.EnsureIngress(framework.NewSingleIngress(
canaryService,
"/",
host,
f.Namespace,
canaryService,
80,
map[string]string{
"nginx.ingress.kubernetes.io/canary": "true",
"nginx.ingress.kubernetes.io/canary-by-header": "CanaryByHeader",
},
))

f.HTTPTestClient().
GET("/status/401").
WithHeader("Host", host).
Expect().
Status(http.StatusUnauthorized).
Body().
Contains("401")
Comment on lines +128 to +134
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Due to the passage of time, I need to reload some memories.

Can this case cover the actual scenarios in that issue?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the test case was written specifically to reproduce the issue, before fixing it

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Means we are good to go? ;)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have been trying to find time to verify what would happen with this test case before the Lua part modification.

This is the only issue that I am currently worried about.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dfl-aeb @Spazzy757 Sorry for the delay, very busy with daily work.

Can you help me verify the result of this test case when there are no modifications to the Lua part? Other than that, I have no other concerns about this PR.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry to bother you again @Spazzy757
Do you think you can find some time in the next few weeks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can try do this next week and paste the results, I wrote the test before making changes to the Lua to verify the issue.

I also tested this, manually using the same method I used in #9944

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have any updates @Spazzy757 ?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Spazzy757 @tao12345666333 friendly reminder :)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Spazzy757 could you please share your test results?

})

ginkgo.It("should return 404 status for requests to the canary if no matching ingress is found", func() {
host := fooHost

Expand Down
Loading