Skip to content

Commit c6637be

Browse files
Remove explicit dependencies on the decommissioned annotation-service (#1091)
* Add deprecated legacy versions of GeolocationIP and ASData records * Eliminate dependency on annotation-service * Remove unused annotation functions
1 parent cc1f5f4 commit c6637be

File tree

11 files changed

+85
-160
lines changed

11 files changed

+85
-160
lines changed

cmd/etl_worker/etl_worker.go

-4
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ import (
2727
"github.com/m-lab/etl/task"
2828
"github.com/m-lab/etl/worker"
2929

30-
"github.com/m-lab/annotation-service/site"
31-
3230
// Enable profiling. For more background and usage information, see:
3331
// https://blog.golang.org/profiling-go-programs
3432
_ "net/http/pprof"
@@ -276,8 +274,6 @@ func main() {
276274
log.Println("To resolve oauth problems, run 'gcloud auth application-default login'")
277275
}
278276

279-
go site.MustReload(mainCtx)
280-
281277
// Enable block profiling
282278
runtime.SetBlockProfileRate(1000000) // One event per msec.
283279

factory/factory.go

-24
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@ package factory
44
import (
55
"context"
66

7-
v2 "github.com/m-lab/annotation-service/api/v2"
8-
97
"github.com/m-lab/etl/etl"
10-
"github.com/m-lab/etl/parser"
118
"github.com/m-lab/etl/row"
129
)
1310

@@ -35,11 +32,6 @@ func NewError(dt, detail string, code int, err error) etl.ProcessingError {
3532
return processingError{dt, detail, code, err}
3633
}
3734

38-
// AnnotatorFactory provides Get() which always returns a new or existing Annotator.
39-
type AnnotatorFactory interface {
40-
Get(context.Context, etl.DataPath) (v2.Annotator, etl.ProcessingError)
41-
}
42-
4335
// SinkFactory provides Get() which may return a new or existing Sink.
4436
// If existing Sink, the Commit method must support concurrent calls.
4537
// Existing Sink may or may not respect the context.
@@ -51,19 +43,3 @@ type SinkFactory interface {
5143
type SourceFactory interface {
5244
Get(context.Context, etl.DataPath) (etl.TestSource, etl.ProcessingError)
5345
}
54-
55-
//=======================================================================
56-
// Implementations
57-
//=======================================================================
58-
59-
type defaultAnnotatorFactory struct{}
60-
61-
// Get implements AnnotatorFactory.Get
62-
func (ann *defaultAnnotatorFactory) Get(ctx context.Context, dp etl.DataPath) (v2.Annotator, etl.ProcessingError) {
63-
return &parser.NullAnnotator{}, nil
64-
}
65-
66-
// DefaultAnnotatorFactory returns the annotation service annotator.
67-
func DefaultAnnotatorFactory() AnnotatorFactory {
68-
return &defaultAnnotatorFactory{}
69-
}

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ require (
1616
github.com/gorilla/websocket v1.5.0 // indirect
1717
github.com/iancoleman/strcase v0.2.0
1818
github.com/kr/pretty v0.2.1
19-
github.com/m-lab/annotation-service v0.0.0-20210713124633-fa227b3d5b2f
19+
github.com/m-lab/annotation-service v0.0.0-20210713124633-fa227b3d5b2f // indirect
2020
github.com/m-lab/etl-gardener v0.0.0-20210910143655-d4bda5bfc75d
2121
github.com/m-lab/go v0.1.47
2222
github.com/m-lab/ndt-server v0.20.9

parser/annotation.go

-12
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package parser
22

33
import (
4-
"context"
54
"encoding/json"
65
"log"
76
"strings"
@@ -10,8 +9,6 @@ import (
109
"cloud.google.com/go/bigquery"
1110

1211
"cloud.google.com/go/civil"
13-
"github.com/m-lab/annotation-service/api"
14-
v2as "github.com/m-lab/annotation-service/api/v2"
1512
"github.com/m-lab/etl/etl"
1613
"github.com/m-lab/etl/metrics"
1714
"github.com/m-lab/etl/row"
@@ -30,15 +27,6 @@ type AnnotationParser struct {
3027
suffix string
3128
}
3229

33-
// NullAnnotator mimicks the annotation-service API, and always returns an empty
34-
// result without any network connections.
35-
type NullAnnotator struct{}
36-
37-
// GetAnnotations always returns an empty annotation result.
38-
func (ann *NullAnnotator) GetAnnotations(ctx context.Context, date time.Time, ips []string, info ...string) (*v2as.Response, error) {
39-
return &v2as.Response{AnnotatorDate: time.Now(), Annotations: make(map[string]*api.Annotations, 0)}, nil
40-
}
41-
4230
// NewAnnotationParser creates a new parser for annotation data.
4331
func NewAnnotationParser(sink row.Sink, label, suffix string) etl.Parser {
4432
bufSize := etl.ANNOTATION.BQBufferSize()

parser/parser_test.go

-18
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,12 @@
33
package parser_test
44

55
import (
6-
"context"
76
"fmt"
87
"log"
98
"os"
109
"testing"
11-
"time"
1210

1311
"cloud.google.com/go/bigquery"
14-
"github.com/m-lab/annotation-service/api"
15-
v2 "github.com/m-lab/annotation-service/api/v2"
1612
"github.com/m-lab/etl/etl"
1713
"github.com/m-lab/etl/metrics"
1814
"github.com/m-lab/etl/parser"
@@ -51,20 +47,6 @@ func (ti *countingInserter) Flush() error {
5147
return nil
5248
}
5349

54-
// newFakeAnnotator creates a new annotator that injects the given annotation
55-
// responses for unit testing.
56-
func newFakeAnnotator(ann map[string]*api.Annotations) *fakeAnnotator {
57-
return &fakeAnnotator{ann: ann}
58-
}
59-
60-
type fakeAnnotator struct {
61-
ann map[string]*api.Annotations
62-
}
63-
64-
func (ann *fakeAnnotator) GetAnnotations(ctx context.Context, date time.Time, ips []string, info ...string) (*v2.Response, error) {
65-
return &v2.Response{AnnotatorDate: time.Now(), Annotations: ann.ann}, nil
66-
}
67-
6850
func TestNormalizeIP(t *testing.T) {
6951
tests := []struct {
7052
name string

parser/tcpinfo_test.go

-34
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"time"
1515

1616
"cloud.google.com/go/civil"
17-
"github.com/m-lab/annotation-service/api"
1817
"github.com/m-lab/etl/etl"
1918
"github.com/m-lab/etl/parser"
2019
"github.com/m-lab/etl/schema"
@@ -55,39 +54,6 @@ func fileSource(fn string) (etl.TestSource, error) {
5554
RetryBaseTime: timeout, TableBase: "test", PathDate: civil.Date{Year: 2020, Month: 6, Day: 11}}, nil
5655
}
5756

58-
var tcpInfoAnno = map[string]*api.Annotations{
59-
// client ip.
60-
"35.225.75.192": &api.Annotations{
61-
Geo: &api.GeolocationIP{
62-
ContinentCode: "NA",
63-
CountryCode: "US",
64-
Latitude: 1.0,
65-
Longitude: 2.0,
66-
},
67-
Network: &api.ASData{
68-
ASNumber: 1234,
69-
Systems: []api.System{
70-
{ASNs: []uint32{1234}},
71-
},
72-
},
73-
},
74-
// server ip.
75-
"195.89.146.242": &api.Annotations{
76-
Geo: &api.GeolocationIP{
77-
ContinentCode: "NA",
78-
CountryCode: "US",
79-
Latitude: 1.0,
80-
Longitude: 2.0,
81-
},
82-
Network: &api.ASData{
83-
ASNumber: 1234,
84-
Systems: []api.System{
85-
{ASNs: []uint32{1234}},
86-
},
87-
},
88-
},
89-
}
90-
9157
type inMemorySink struct {
9258
data []interface{}
9359
committed int

row/row_test.go

+6-20
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,13 @@ import (
66
"time"
77

88
"github.com/m-lab/etl/row"
9-
10-
"github.com/m-lab/annotation-service/api"
119
)
1210

1311
// Implement parser.Annotatable
1412

1513
type Row struct {
16-
client string
17-
server string
18-
clientAnn *api.Annotations
19-
serverAnn *api.Annotations
14+
client string
15+
server string
2016
}
2117

2218
type BadRow struct{}
@@ -29,16 +25,6 @@ func (row *Row) GetServerIP() string {
2925
return row.server
3026
}
3127

32-
func (row *Row) AnnotateClients(remote map[string]*api.Annotations) error {
33-
row.clientAnn = remote[row.GetClientIPs()[0]]
34-
return nil
35-
}
36-
37-
func (row *Row) AnnotateServer(local *api.GeoData) error {
38-
row.serverAnn = local
39-
return nil
40-
}
41-
4228
func (row *Row) GetLogTime() time.Time {
4329
return time.Now()
4430
}
@@ -71,10 +57,10 @@ func TestBase(t *testing.T) {
7157

7258
b := row.NewBase("test", ins, 10)
7359

74-
b.Put(&Row{"1.2.3.4", "4.3.2.1", nil, nil})
60+
b.Put(&Row{"1.2.3.4", "4.3.2.1"})
7561

7662
// Add a row with empty server IP
77-
b.Put(&Row{"1.2.3.4", "", nil, nil})
63+
b.Put(&Row{"1.2.3.4", ""})
7864
b.Flush()
7965
stats := b.GetStats()
8066
if stats.Committed != 2 {
@@ -91,14 +77,14 @@ func TestAsyncPut(t *testing.T) {
9177

9278
b := row.NewBase("test", ins, 1)
9379

94-
b.Put(&Row{"1.2.3.4", "4.3.2.1", nil, nil})
80+
b.Put(&Row{"1.2.3.4", "4.3.2.1"})
9581

9682
if b.GetStats().Committed != 0 {
9783
t.Fatalf("Expected %d, Got %d.", 0, b.GetStats().Committed)
9884
}
9985

10086
// This should trigger an async flush
101-
b.Put(&Row{"1.2.3.4", "4.3.2.1", nil, nil})
87+
b.Put(&Row{"1.2.3.4", "4.3.2.1"})
10288
start := time.Now()
10389
for time.Since(start) < 5*time.Second && b.GetStats().Committed < 1 {
10490
time.Sleep(10 * time.Millisecond)

schema/ndt_web100.go

+19-20
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"time"
55

66
"cloud.google.com/go/bigquery"
7-
"github.com/m-lab/annotation-service/api"
87
"github.com/m-lab/go/cloud/bqx"
98
"github.com/m-lab/uuid-annotator/annotator"
109
)
@@ -35,25 +34,25 @@ type ndtAnomalies struct {
3534
}
3635

3736
type ndtConnectionSpec struct {
38-
ClientAF int64 `bigquery:"client_af"`
39-
ClientApplication string `bigquery:"client_application"`
40-
ClientBrowser string `bigquery:"client_browser"`
41-
ClientHostname string `bigquery:"client_hostname"`
42-
ClientIP string `bigquery:"client_ip"`
43-
ClientKernelVersion string `bigquery:"client_kernel_version"`
44-
ClientOS string `bigquery:"client_os"`
45-
ClientVersion string `bigquery:"client_version"`
46-
DataDirection int64 `bigquery:"data_direction"`
47-
ServerAF int64 `bigquery:"server_af"`
48-
ServerHostname string `bigquery:"server_hostname"`
49-
ServerIP string `bigquery:"server_ip"`
50-
ServerKernelVersion string `bigquery:"server_kernel_version"`
51-
TLS bool `bigquery:"tls"`
52-
Websockets bool `bigquery:"websockets"`
53-
ClientGeolocation api.GeolocationIP `bigquery:"client_geolocation"`
54-
ServerGeolocation api.GeolocationIP `bigquery:"server_geolocation"`
55-
Client ndtClientNetwork `bigquery:"client"`
56-
Server ndtServerNetwork `bigquery:"server"`
37+
ClientAF int64 `bigquery:"client_af"`
38+
ClientApplication string `bigquery:"client_application"`
39+
ClientBrowser string `bigquery:"client_browser"`
40+
ClientHostname string `bigquery:"client_hostname"`
41+
ClientIP string `bigquery:"client_ip"`
42+
ClientKernelVersion string `bigquery:"client_kernel_version"`
43+
ClientOS string `bigquery:"client_os"`
44+
ClientVersion string `bigquery:"client_version"`
45+
DataDirection int64 `bigquery:"data_direction"`
46+
ServerAF int64 `bigquery:"server_af"`
47+
ServerHostname string `bigquery:"server_hostname"`
48+
ServerIP string `bigquery:"server_ip"`
49+
ServerKernelVersion string `bigquery:"server_kernel_version"`
50+
TLS bool `bigquery:"tls"`
51+
Websockets bool `bigquery:"websockets"`
52+
ClientGeolocation LegacyGeolocationIP `bigquery:"client_geolocation"`
53+
ServerGeolocation LegacyGeolocationIP `bigquery:"server_geolocation"`
54+
Client ndtClientNetwork `bigquery:"client"`
55+
Server ndtServerNetwork `bigquery:"server"`
5756

5857
// ServerX and ClientX are for the synthetic UUID annotator export process.
5958
ServerX annotator.ServerAnnotations

schema/schema.go

+52-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import (
88
"reflect"
99
"time"
1010

11-
"github.com/m-lab/annotation-service/api"
1211
"github.com/m-lab/go/cloud/bqx"
1312
)
1413

@@ -28,17 +27,17 @@ type ServerInfo struct {
2827
Port uint16
2928
IATA string
3029

31-
Geo *api.GeolocationIP
32-
Network *api.ASData // NOTE: dominant ASN is available at top level.
30+
Geo *LegacyGeolocationIP
31+
Network *LegacyASData // NOTE: dominant ASN is available at top level.
3332
}
3433

3534
// ClientInfo details various kinds of information about the client.
3635
type ClientInfo struct {
3736
IP string
3837
Port uint16
3938

40-
Geo *api.GeolocationIP
41-
Network *api.ASData // NOTE: dominant ASN is available at top level.
39+
Geo *LegacyGeolocationIP
40+
Network *LegacyASData // NOTE: dominant ASN is available at top level.
4241
}
4342

4443
// ParseInfoV0 provides details about the parsing of this row.
@@ -49,6 +48,54 @@ type ParseInfoV0 struct {
4948
Filename string
5049
}
5150

51+
/*************************************************************************
52+
* DEPRECATED: Annotation Structs *
53+
*************************************************************************/
54+
55+
// LegacyGeolocationIP preserves the schema for existing v1 datatype schemas. It should not be used for new datatypes.
56+
// Deprecated: v1 annotation-service schema, preserved for backward compatibility. Do not reuse.
57+
type LegacyGeolocationIP struct {
58+
ContinentCode string `json:"continent_code,,omitempty" bigquery:"continent_code"` // Gives a shorthand for the continent
59+
CountryCode string `json:"country_code,,omitempty" bigquery:"country_code"` // Gives a shorthand for the country
60+
CountryCode3 string `json:"country_code3,,omitempty" bigquery:"country_code3"` // Gives a shorthand for the country
61+
CountryName string `json:"country_name,,omitempty" bigquery:"country_name"` // Name of the country
62+
Region string `json:"region,,omitempty" bigquery:"region"` // Region or State within the country
63+
Subdivision1ISOCode string `json:",omitempty"` // ISO3166-2 first-level country subdivision ISO code
64+
Subdivision1Name string `json:",omitempty"` // ISO3166-2 first-level country subdivision name
65+
Subdivision2ISOCode string `json:",omitempty"` // ISO3166-2 second-level country subdivision ISO code
66+
Subdivision2Name string `json:",omitempty"` // ISO3166-2 second-level country subdivision name
67+
MetroCode int64 `json:"metro_code,,omitempty" bigquery:"metro_code"` // Metro code within the country
68+
City string `json:"city,,omitempty" bigquery:"city"` // City within the region
69+
AreaCode int64 `json:"area_code,,omitempty" bigquery:"area_code"` // Area code, similar to metro code
70+
PostalCode string `json:"postal_code,,omitempty" bigquery:"postal_code"` // Postal code, again similar to metro
71+
Latitude float64 `json:"latitude,,omitempty" bigquery:"latitude"` // Latitude
72+
Longitude float64 `json:"longitude,,omitempty" bigquery:"longitude"` // Longitude
73+
AccuracyRadiusKm int64 `json:"radius,,omitempty" bigquery:"radius"` // Accuracy Radius (geolite2 from 2018)
74+
75+
Missing bool `json:",omitempty"` // True when the Geolocation data is missing from MaxMind.
76+
}
77+
78+
type LegacySystem struct {
79+
// ASNs contains a single ASN, or AS set. There must always be at least one ASN.
80+
// If there are more than one ASN, they are (arbitrarily) listed in increasing numerical order.
81+
ASNs []uint32
82+
}
83+
84+
// LegacyASData preserves the schema for existing v1 datatype schemas. It should not be used for new datatypes.
85+
// Deprecated: v1 annotation-service schema, preserved for backward compatibility. Do not reuse.
86+
type LegacyASData struct {
87+
IPPrefix string `json:",omitempty"` // the IP prefix found in the table.
88+
CIDR string `json:",omitempty"` // The IP prefix found in the RouteViews data.
89+
ASNumber uint32 `json:",omitempty"` // First AS number.
90+
ASName string `json:",omitempty"` // AS name for that number, data from IPinfo.io
91+
Missing bool `json:",omitempty"` // True when the ASN data is missing from RouteViews.
92+
93+
// One or more "Systems". There must always be at least one System. If there are more than one,
94+
// then this is a Multi-Origin AS, and the component Systems are in order of frequency in routing tables,
95+
// most common first.
96+
Systems []LegacySystem `json:",omitempty"`
97+
}
98+
5299
// FindSchemaDocsFor should be used by parser row types to associate bigquery
53100
// field descriptions with a schema generated from a row type.
54101
func FindSchemaDocsFor(value interface{}) []bqx.SchemaDoc {

0 commit comments

Comments
 (0)