Skip to content

Commit f1f7d2f

Browse files
authored
refactor(component,generic,http): replace env-based URL validation with constructor injection (#1121)
To address the PR [comment](https://linear.app/instill-ai/review/testcomponentgenerichttp-replace-external-httpbinorg-dependency-with-575c6f0d7670#comment-d77feedd): Because - The HTTP component relied on implicit environment variables (`GO_TESTING`) to control URL validation behavior, making the component's security posture unclear and hard to test - URL validation logic was scattered and difficult to maintain, with security-critical code mixed with component logic - The component allowed dangerous localhost access in test mode without explicit configuration - There was no comprehensive test coverage for the URL validation logic, especially for internal service whitelisting **This commit** - Introduces a `URLValidator` interface with constructor-based dependency injection, making validation behavior explicit and testable - Consolidates URL validation logic into a dedicated `validator.go` file with a unified `urlValidator` implementation - Replaces environment variable checks with configurable validator instances (`NewURLValidator()` for production, `NewTestURLValidator()` for testing) - Fixes a critical security vulnerability where test mode implicitly allowed all localhost/127.x.x.x addresses without explicit opt-in - Adds comprehensive test coverage including production whitelist testing for internal services (`pipeline-backend:8081`, `model-backend:8083`) - Preserves existing security guarantees: production mode only allows publicly available endpoints, blocking private/internal IP addresses - Maintains backward compatibility while making the component's behavior more explicit and secure - Consolidates multiple test initialization functions into a single flexible `InitForTest()` function - Restores important future work comments about internal service access patterns
1 parent cd1bd55 commit f1f7d2f

File tree

4 files changed

+448
-80
lines changed

4 files changed

+448
-80
lines changed

pkg/component/generic/http/v0/main.go

Lines changed: 28 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,7 @@ import (
77
"errors"
88
"fmt"
99
"mime"
10-
"net"
1110
"net/http"
12-
"net/url"
13-
"os"
14-
"slices"
1511
"strings"
1612
"sync"
1713

@@ -20,7 +16,6 @@ import (
2016
"google.golang.org/protobuf/proto"
2117
"google.golang.org/protobuf/types/known/structpb"
2218

23-
"github.com/instill-ai/pipeline-backend/config"
2419
"github.com/instill-ai/pipeline-backend/pkg/component/base"
2520
"github.com/instill-ai/pipeline-backend/pkg/component/internal/util/httpclient"
2621
"github.com/instill-ai/pipeline-backend/pkg/data"
@@ -63,6 +58,7 @@ var (
6358

6459
type component struct {
6560
base.Component
61+
urlValidator URLValidator
6662
}
6763

6864
type execution struct {
@@ -72,21 +68,13 @@ type execution struct {
7268
execute func(context.Context, *base.Job) error
7369
}
7470

75-
// isTestEnvironment detects if we're running in a test environment
76-
func isTestEnvironment() bool {
77-
// Check if we're running under go test
78-
for _, arg := range os.Args {
79-
if strings.Contains(arg, "test") {
80-
return true
81-
}
82-
}
83-
// Also check for test environment variable
84-
return os.Getenv("GO_TESTING") == "true"
85-
}
86-
71+
// Init creates a component instance for production use
8772
func Init(bc base.Component) *component {
8873
once.Do(func() {
89-
comp = &component{Component: bc}
74+
comp = &component{
75+
Component: bc,
76+
urlValidator: NewURLValidator(),
77+
}
9078
err := comp.LoadDefinition(definitionYAML, setupYAML, tasksYAML, nil, nil)
9179
if err != nil {
9280
panic(err)
@@ -95,6 +83,21 @@ func Init(bc base.Component) *component {
9583
return comp
9684
}
9785

86+
// InitForTest creates a component instance for testing with configurable validation
87+
// whitelist: URLs to allow (nil/empty = allow all external URLs)
88+
// allowLocalhost: whether to allow localhost/127.x.x.x URLs
89+
func InitForTest(bc base.Component, whitelist []string, allowLocalhost bool) *component {
90+
c := &component{
91+
Component: bc,
92+
urlValidator: NewTestURLValidator(whitelist, allowLocalhost),
93+
}
94+
err := c.LoadDefinition(definitionYAML, setupYAML, tasksYAML, nil, nil)
95+
if err != nil {
96+
panic(err)
97+
}
98+
return c
99+
}
100+
98101
func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, error) {
99102

100103
// We may have different url in batch.
@@ -159,60 +162,12 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error {
159162
return base.ConcurrentExecutor(ctx, jobs, e.execute)
160163
}
161164

162-
// validateURL checks the component's input is a valid URL. This component only
165+
// validateInput checks the component's input is a valid URL. In production mode, this component only
163166
// accepts requests to *publicly available* endpoints. Any call to the internal
164-
// network will produce an error.
165-
func (e *execution) validateURL(endpointURL string) error {
166-
// Skip validation in test environment
167-
if isTestEnvironment() {
168-
return nil
169-
}
170-
parsedURL, err := url.Parse(endpointURL)
171-
if err != nil {
172-
return errorsx.AddMessage(
173-
fmt.Errorf("parsing endpoint URL: %w", err),
174-
"Couldn't parse the endpoint URL as a valid URI reference",
175-
)
176-
}
177-
178-
host := parsedURL.Hostname()
179-
if host == "" {
180-
err := fmt.Errorf("missing hostname")
181-
return errorsx.AddMessage(err, "Endpoint URL must have a hostname")
182-
}
183-
184-
var whitelistedHosts = []string{
185-
// Pipeline's public port is exposed to call pipelines from pipelines.
186-
// When a `pipeline` component is implemented, this won't be necessary.
187-
fmt.Sprintf("%s:%d", config.Config.Server.InstanceID, config.Config.Server.PublicPort),
188-
// Model's public port is exposed until the model component allows
189-
// triggering models in the custom mode.
190-
fmt.Sprintf("%s:%d", config.Config.ModelBackend.Host, config.Config.ModelBackend.PublicPort),
191-
}
192-
// Certain pipelines used by artifact-backend need to trigger pipelines and
193-
// models via this component.
194-
// TODO jvallesm: Remove this after INS-8119 is completed.
195-
if slices.Contains(whitelistedHosts, parsedURL.Host) {
196-
return nil
197-
}
198-
199-
// Get IP addresses for the host
200-
ips, err := net.LookupIP(host)
201-
if err != nil {
202-
return fmt.Errorf("looking up IP: %w", err)
203-
}
204-
205-
// Check if any resolved IP is in private ranges
206-
for _, ip := range ips {
207-
if ip.IsPrivate() || ip.IsLoopback() {
208-
return errorsx.AddMessage(
209-
fmt.Errorf("endpoint URL resolves to private/internal IP address"),
210-
"URL must point to a publicly available endpoint (no private/internal addresses)",
211-
)
212-
}
213-
}
214-
215-
return nil
167+
// network will produce an error. In test mode, behavior is controlled by whitelist and localhost settings.
168+
func (e *execution) validateInput(input *httpInput) error {
169+
comp := e.Component.(*component)
170+
return comp.urlValidator.ValidateInput(input)
216171
}
217172

218173
func (e *execution) executeHTTP(ctx context.Context, job *base.Job) error {
@@ -221,8 +176,8 @@ func (e *execution) executeHTTP(ctx context.Context, job *base.Job) error {
221176
return fmt.Errorf("reading input data: %w", err)
222177
}
223178

224-
if err := e.validateURL(in.EndpointURL); err != nil {
225-
return fmt.Errorf("validating URL: %w", err)
179+
if err := e.validateInput(&in); err != nil {
180+
return fmt.Errorf("validating input: %w", err)
226181
}
227182

228183
// An API error is a valid output in this component.

pkg/component/generic/http/v0/component_test.go renamed to pkg/component/generic/http/v0/main_test.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ import (
66
"io"
77
"net/http"
88
"net/http/httptest"
9-
"os"
109
"strings"
1110
"testing"
1211

13-
qt "github.com/frankban/quicktest"
1412
"google.golang.org/protobuf/types/known/structpb"
1513

14+
qt "github.com/frankban/quicktest"
15+
1616
"github.com/instill-ai/pipeline-backend/pkg/component/base"
1717
"github.com/instill-ai/pipeline-backend/pkg/component/internal/mock"
1818
"github.com/instill-ai/pipeline-backend/pkg/data"
@@ -228,10 +228,6 @@ func TestComponent(t *testing.T) {
228228
c := qt.New(t)
229229
c.Parallel()
230230

231-
// Set test environment to bypass URL validation
232-
os.Setenv("GO_TESTING", "true")
233-
defer os.Unsetenv("GO_TESTING")
234-
235231
// respEquals returns a checker for equality between the received response
236232
// and the expected one.
237233
respEquals := func(want httpOutput) func(*qt.C, httpOutput) {
@@ -524,7 +520,8 @@ func TestComponent(t *testing.T) {
524520
actualInput := tc.input
525521
actualInput.EndpointURL = strings.Replace(actualInput.EndpointURL, "PLACEHOLDER_URL", server.URL, 1)
526522

527-
component := Init(base.Component{})
523+
// Use InitForTest with localhost enabled to create component that allows localhost URLs
524+
component := InitForTest(base.Component{}, nil, true)
528525
c.Assert(component, qt.IsNotNil)
529526

530527
execution, err := component.CreateExecution(base.ComponentExecution{
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
package http
2+
3+
import (
4+
"fmt"
5+
"net"
6+
"net/url"
7+
"slices"
8+
"strings"
9+
10+
"github.com/instill-ai/pipeline-backend/config"
11+
errorsx "github.com/instill-ai/x/errors"
12+
)
13+
14+
// URLValidator defines the interface for validating HTTP input
15+
type URLValidator interface {
16+
ValidateInput(input *httpInput) error
17+
}
18+
19+
// urlValidator provides common validation logic
20+
type urlValidator struct {
21+
whitelistedEndpoints []string
22+
allowLocalhost bool
23+
allowPrivateIPs bool
24+
}
25+
26+
// NewURLValidator creates a validator for production use
27+
func NewURLValidator() URLValidator {
28+
return &urlValidator{allowPrivateIPs: false}
29+
}
30+
31+
// NewTestURLValidator creates a validator for testing
32+
func NewTestURLValidator(whitelistedEndpoints []string, allowLocalhost bool) URLValidator {
33+
return &urlValidator{
34+
whitelistedEndpoints: whitelistedEndpoints,
35+
allowLocalhost: allowLocalhost,
36+
allowPrivateIPs: true, // Test mode allows external URLs by default
37+
}
38+
}
39+
40+
// ValidateInput implements the consolidated validation logic
41+
func (v *urlValidator) ValidateInput(input *httpInput) error {
42+
if input == nil {
43+
return fmt.Errorf("input cannot be nil")
44+
}
45+
46+
endpointURL := input.EndpointURL
47+
if endpointURL == "" {
48+
return errorsx.AddMessage(
49+
fmt.Errorf("endpoint URL is required"),
50+
"Endpoint URL must be provided",
51+
)
52+
}
53+
54+
parsedURL, err := url.Parse(endpointURL)
55+
if err != nil {
56+
return errorsx.AddMessage(
57+
fmt.Errorf("parsing endpoint URL: %w", err),
58+
"Couldn't parse the endpoint URL as a valid URI reference",
59+
)
60+
}
61+
62+
host := parsedURL.Hostname()
63+
if host == "" {
64+
return errorsx.AddMessage(
65+
fmt.Errorf("missing hostname"),
66+
"Endpoint URL must have a hostname",
67+
)
68+
}
69+
70+
// Check explicit whitelist first
71+
for _, allowedEndpoint := range v.whitelistedEndpoints {
72+
if strings.HasPrefix(endpointURL, allowedEndpoint) {
73+
return nil
74+
}
75+
}
76+
77+
// Production-specific whitelisted hosts
78+
if !v.allowPrivateIPs {
79+
prodWhitelist := []string{
80+
// Pipeline's public port is exposed to call pipelines from pipelines.
81+
// When a `pipeline` component is implemented, this won't be necessary.
82+
fmt.Sprintf("%s:%d", config.Config.Server.InstanceID, config.Config.Server.PublicPort),
83+
// Model's public port is exposed until the model component allows
84+
// triggering models in the custom mode.
85+
fmt.Sprintf("%s:%d", config.Config.ModelBackend.Host, config.Config.ModelBackend.PublicPort),
86+
}
87+
// Certain pipelines used by artifact-backend need to trigger pipelines and
88+
// models via this component.
89+
// TODO jvallesm: Remove this after INS-8119 is completed.
90+
if slices.Contains(prodWhitelist, parsedURL.Host) {
91+
return nil
92+
}
93+
}
94+
95+
// Check localhost allowance
96+
if v.allowLocalhost && (host == "localhost" || host == "127.0.0.1" || strings.HasPrefix(host, "127.")) {
97+
return nil
98+
}
99+
100+
// Production mode: check if IP is private and block private IPs
101+
if !v.allowPrivateIPs {
102+
ips, err := net.LookupIP(host)
103+
if err != nil {
104+
return fmt.Errorf("looking up IP: %w", err)
105+
}
106+
107+
for _, ip := range ips {
108+
if ip.IsPrivate() || ip.IsLoopback() {
109+
return errorsx.AddMessage(
110+
fmt.Errorf("endpoint URL resolves to private/internal IP address"),
111+
"URL must point to a publicly available endpoint (no private/internal addresses)",
112+
)
113+
}
114+
}
115+
// Production mode: allow public IPs
116+
return nil
117+
}
118+
119+
// Test mode: apply whitelist and localhost restrictions
120+
if len(v.whitelistedEndpoints) > 0 {
121+
return errorsx.AddMessage(
122+
fmt.Errorf("endpoint URL not in test whitelist"),
123+
fmt.Sprintf("URL %s is not in the whitelisted endpoints for testing", endpointURL),
124+
)
125+
}
126+
127+
if !v.allowLocalhost {
128+
return errorsx.AddMessage(
129+
fmt.Errorf("endpoint URL not allowed"),
130+
"No endpoints are whitelisted for testing and localhost access is disabled",
131+
)
132+
}
133+
134+
// Test mode: allow if localhost is enabled
135+
return nil
136+
}

0 commit comments

Comments
 (0)