Skip to content

Commit 33e27c7

Browse files
raphaelgavacheamarzialimccullsPerfectSlayer
authored
support service discovery with JNA (#9705)
* try to plug memfd to core-tracer * prepare encoding * add proper encoding * suggestions * add groovy test * spotless * fix graalvm runs * fix test * try something for graalvm * Fix GraalVM smoke test on Java 21 The root issue was that the reference to MemFDUnixWriter in the TracerInstaller bytecode was being picked up by HelperScanner, which is a utility that we use to avoid having to maintain an exact list of classes to inject into GraalVM in order to activate the tracer in the final native binary via VMRuntimeInstrumentation and TracerActivation. The simplest solution was to use reflection to load MemFDUnixWriter. The alternative would have been to enhance HelperScanner to be able to exclude types from the list of classes it finds. However this would be more complicated. * Skip reflection call when launching native image * Suppress warning about use of Class.forName because it's intentional * Lazily load and execute service discovery * hide subprocess span due to jna command execution * update after review * match json spec for python decoder in system-tests * update after review * add config to disable * Rearrange config * update after review * update after review * Apply suggestions from code review Co-authored-by: Stuart McCulloch <stuart.mcculloch@datadoghq.com> * feat: Refactor factory and introduce NOOP * feat: Removing noop to avoid always schedule * match agent prefix matcher --------- Co-authored-by: Andrea Marziali <andrea.marziali@datadoghq.com> Co-authored-by: Stuart McCulloch <stuart.mcculloch@datadoghq.com> Co-authored-by: Bruce Bujon <bruce.bujon@datadoghq.com>
1 parent c85d09f commit 33e27c7

File tree

10 files changed

+312
-0
lines changed

10 files changed

+312
-0
lines changed

dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/TracerInstaller.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
package datadog.trace.agent.tooling;
22

33
import datadog.communication.ddagent.SharedCommunicationObjects;
4+
import datadog.environment.OperatingSystem;
45
import datadog.trace.api.Config;
56
import datadog.trace.api.GlobalTracer;
7+
import datadog.trace.api.Platform;
68
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
79
import datadog.trace.bootstrap.instrumentation.api.ProfilingContextIntegration;
810
import datadog.trace.core.CoreTracer;
11+
import datadog.trace.core.servicediscovery.ForeignMemoryWriter;
12+
import datadog.trace.core.servicediscovery.ServiceDiscovery;
13+
import datadog.trace.core.servicediscovery.ServiceDiscoveryFactory;
14+
import de.thetaphi.forbiddenapis.SuppressForbidden;
915
import org.slf4j.Logger;
1016
import org.slf4j.LoggerFactory;
1117

@@ -23,6 +29,7 @@ public static synchronized void installGlobalTracer(
2329
.profilingContextIntegration(profilingContextIntegration)
2430
.reportInTracerFlare()
2531
.pollForTracingConfiguration()
32+
.serviceDiscoveryFactory(serviceDiscoveryFactory())
2633
.build();
2734
installGlobalTracer(tracer);
2835
} else {
@@ -33,6 +40,38 @@ public static synchronized void installGlobalTracer(
3340
}
3441
}
3542

43+
private static ServiceDiscoveryFactory serviceDiscoveryFactory() {
44+
if (!Config.get().isServiceDiscoveryEnabled()) {
45+
return null;
46+
}
47+
if (!OperatingSystem.isLinux()) {
48+
log.debug("service discovery not supported outside linux");
49+
return null;
50+
}
51+
// make sure this branch is not considered possible for graalvm artifact
52+
if (Platform.isNativeImageBuilder() || Platform.isNativeImage()) {
53+
log.debug("service discovery not supported on native images");
54+
return null;
55+
}
56+
return TracerInstaller::initServiceDiscovery;
57+
}
58+
59+
@SuppressForbidden // intentional use of Class.forName
60+
private static ServiceDiscovery initServiceDiscovery() {
61+
try {
62+
// use reflection to load MemFDUnixWriter so it doesn't get picked up when we
63+
// transitively look for all tracer class dependencies to install in GraalVM via
64+
// VMRuntimeInstrumentation
65+
Class<?> memFdClass =
66+
Class.forName("datadog.trace.agent.tooling.servicediscovery.MemFDUnixWriter");
67+
ForeignMemoryWriter memFd = (ForeignMemoryWriter) memFdClass.getConstructor().newInstance();
68+
return new ServiceDiscovery(memFd);
69+
} catch (Throwable e) {
70+
log.debug("service discovery not supported", e);
71+
return null;
72+
}
73+
}
74+
3675
public static void installGlobalTracer(final CoreTracer tracer) {
3776
try {
3877
GlobalTracer.registerIfAbsent(tracer);
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package datadog.trace.agent.tooling.servicediscovery;
2+
3+
import com.sun.jna.Library;
4+
import com.sun.jna.Memory;
5+
import com.sun.jna.Native;
6+
import com.sun.jna.NativeLong;
7+
import com.sun.jna.Pointer;
8+
import datadog.trace.core.servicediscovery.ForeignMemoryWriter;
9+
import org.slf4j.Logger;
10+
import org.slf4j.LoggerFactory;
11+
12+
public class MemFDUnixWriter implements ForeignMemoryWriter {
13+
private static final Logger log = LoggerFactory.getLogger(MemFDUnixWriter.class);
14+
15+
private interface LibC extends Library {
16+
int memfd_create(String name, int flags);
17+
18+
NativeLong write(int fd, Pointer buf, NativeLong count);
19+
20+
int fcntl(int fd, int cmd, int arg);
21+
}
22+
23+
// https://elixir.bootlin.com/linux/v6.17.1/source/include/uapi/linux/memfd.h#L8-L9
24+
private static final int MFD_CLOEXEC = 0x0001;
25+
private static final int MFD_ALLOW_SEALING = 0x0002;
26+
27+
// https://elixir.bootlin.com/linux/v6.17.1/source/include/uapi/linux/fcntl.h#L40
28+
private static final int F_ADD_SEALS = 1033; //
29+
30+
// https://elixir.bootlin.com/linux/v6.17.1/source/include/uapi/linux/fcntl.h#L46-L49
31+
private static final int F_SEAL_SEAL = 0x0001;
32+
private static final int F_SEAL_SHRINK = 0x0002;
33+
private static final int F_SEAL_GROW = 0x0004;
34+
35+
@Override
36+
public void write(byte[] payload) {
37+
final LibC libc = Native.load("c", LibC.class);
38+
39+
int memFd = libc.memfd_create("datadog-tracer-info-", MFD_CLOEXEC | MFD_ALLOW_SEALING);
40+
if (memFd < 0) {
41+
log.warn("datadog-tracer-info memfd create failed, errno={}", Native.getLastError());
42+
return;
43+
}
44+
45+
log.debug("datadog-tracer-info memfd created (fd={})", memFd);
46+
47+
Memory buf = new Memory(payload.length);
48+
buf.write(0, payload, 0, payload.length);
49+
50+
NativeLong written = libc.write(memFd, buf, new NativeLong(payload.length));
51+
if (written.longValue() != payload.length) {
52+
log.warn("write to datadog-tracer-info memfd failed errno={}", Native.getLastError());
53+
return;
54+
}
55+
log.debug("wrote {} bytes to memfd {}", written.longValue(), memFd);
56+
int returnCode = libc.fcntl(memFd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL);
57+
if (returnCode == -1) {
58+
log.warn("failed to add seal to datadog-tracer-info memfd errno={}", Native.getLastError());
59+
return;
60+
}
61+
// memfd is not closed to keep it readable for the lifetime of the process.
62+
}
63+
}

dd-trace-api/src/main/java/datadog/trace/api/ConfigDefaults.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@ public final class ConfigDefaults {
246246
static final boolean DEFAULT_TELEMETRY_LOG_COLLECTION_ENABLED = true;
247247
static final int DEFAULT_TELEMETRY_DEPENDENCY_RESOLUTION_QUEUE_SIZE = 100000;
248248

249+
static final boolean DEFAULT_SERVICE_DISCOVERY_ENABLED = true;
250+
249251
static final boolean DEFAULT_RUM_ENABLED = false;
250252
public static final String DEFAULT_RUM_SITE = DEFAULT_SITE;
251253
public static final int DEFAULT_RUM_MAJOR_VERSION = 6;

dd-trace-api/src/main/java/datadog/trace/api/config/TracerConfig.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ public final class TracerConfig {
163163
"trace.cloud.payload.tagging.max-depth";
164164
public static final String TRACE_CLOUD_PAYLOAD_TAGGING_MAX_TAGS =
165165
"trace.cloud.payload.tagging.max-tags";
166+
public static final String TRACE_SERVICE_DISCOVERY_ENABLED = "trace.service.discovery.enabled";
166167

167168
private TracerConfig() {}
168169
}

dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@
9696
import datadog.trace.core.propagation.TracingPropagator;
9797
import datadog.trace.core.propagation.XRayPropagator;
9898
import datadog.trace.core.scopemanager.ContinuableScopeManager;
99+
import datadog.trace.core.servicediscovery.ServiceDiscovery;
100+
import datadog.trace.core.servicediscovery.ServiceDiscoveryFactory;
99101
import datadog.trace.core.taginterceptor.RuleFlags;
100102
import datadog.trace.core.taginterceptor.TagInterceptor;
101103
import datadog.trace.core.traceinterceptor.LatencyTraceInterceptor;
@@ -321,6 +323,7 @@ public static class CoreTracerBuilder {
321323
private TagInterceptor tagInterceptor;
322324
private boolean strictTraceWrites;
323325
private InstrumentationGateway instrumentationGateway;
326+
private ServiceDiscoveryFactory serviceDiscoveryFactory;
324327
private TimeSource timeSource;
325328
private DataStreamsMonitoring dataStreamsMonitoring;
326329
private ProfilingContextIntegration profilingContextIntegration =
@@ -436,6 +439,12 @@ public CoreTracerBuilder instrumentationGateway(InstrumentationGateway instrumen
436439
return this;
437440
}
438441

442+
public CoreTracerBuilder serviceDiscoveryFactory(
443+
ServiceDiscoveryFactory serviceDiscoveryFactory) {
444+
this.serviceDiscoveryFactory = serviceDiscoveryFactory;
445+
return this;
446+
}
447+
439448
public CoreTracerBuilder timeSource(TimeSource timeSource) {
440449
this.timeSource = timeSource;
441450
return this;
@@ -528,6 +537,7 @@ public CoreTracer build() {
528537
tagInterceptor,
529538
strictTraceWrites,
530539
instrumentationGateway,
540+
serviceDiscoveryFactory,
531541
timeSource,
532542
dataStreamsMonitoring,
533543
profilingContextIntegration,
@@ -588,6 +598,7 @@ private CoreTracer(
588598
tagInterceptor,
589599
strictTraceWrites,
590600
instrumentationGateway,
601+
null,
591602
timeSource,
592603
dataStreamsMonitoring,
593604
profilingContextIntegration,
@@ -619,6 +630,7 @@ private CoreTracer(
619630
final TagInterceptor tagInterceptor,
620631
final boolean strictTraceWrites,
621632
final InstrumentationGateway instrumentationGateway,
633+
final ServiceDiscoveryFactory serviceDiscoveryFactory,
622634
final TimeSource timeSource,
623635
final DataStreamsMonitoring dataStreamsMonitoring,
624636
final ProfilingContextIntegration profilingContextIntegration,
@@ -887,6 +899,21 @@ private CoreTracer(
887899

888900
this.localRootSpanTagsNeedIntercept =
889901
this.tagInterceptor.needsIntercept(this.localRootSpanTags);
902+
if (serviceDiscoveryFactory != null) {
903+
AgentTaskScheduler.get()
904+
.schedule(
905+
() -> {
906+
final ServiceDiscovery serviceDiscovery = serviceDiscoveryFactory.get();
907+
if (serviceDiscovery != null) {
908+
// JNA can do ldconfig and other commands. Those are hidden since internal.
909+
try (final TraceScope blackhole = muteTracing()) {
910+
serviceDiscovery.writeTracerMetadata(config);
911+
}
912+
}
913+
},
914+
1,
915+
SECONDS);
916+
}
890917
}
891918

892919
/** Used by AgentTestRunner to inject configuration into the test tracer. */
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package datadog.trace.core.servicediscovery;
2+
3+
@FunctionalInterface
4+
public interface ForeignMemoryWriter {
5+
void write(byte[] payload);
6+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package datadog.trace.core.servicediscovery;
2+
3+
import static java.nio.charset.StandardCharsets.ISO_8859_1;
4+
5+
import datadog.common.container.ContainerInfo;
6+
import datadog.communication.ddagent.TracerVersion;
7+
import datadog.communication.serialization.GrowableBuffer;
8+
import datadog.communication.serialization.msgpack.MsgPackWriter;
9+
import datadog.trace.api.Config;
10+
import datadog.trace.api.ProcessTags;
11+
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString;
12+
import java.nio.ByteBuffer;
13+
import org.slf4j.Logger;
14+
import org.slf4j.LoggerFactory;
15+
16+
public class ServiceDiscovery {
17+
private static final Logger log = LoggerFactory.getLogger(ServiceDiscovery.class);
18+
19+
private final ForeignMemoryWriter foreignMemoryWriter;
20+
21+
public ServiceDiscovery(ForeignMemoryWriter foreignMemoryWriter) {
22+
this.foreignMemoryWriter = foreignMemoryWriter;
23+
}
24+
25+
public void writeTracerMetadata(Config config) {
26+
try {
27+
byte[] payload =
28+
ServiceDiscovery.encodePayload(
29+
TracerVersion.TRACER_VERSION,
30+
config.getHostName(),
31+
config.getRuntimeId(),
32+
config.getServiceName(),
33+
config.getEnv(),
34+
config.getVersion(),
35+
ProcessTags.getTagsForSerialization(),
36+
ContainerInfo.get().getContainerId());
37+
38+
foreignMemoryWriter.write(payload);
39+
} catch (Throwable t) {
40+
log.debug("service discovery memfd write failed", t);
41+
}
42+
}
43+
44+
static byte[] encodePayload(
45+
String tracerVersion,
46+
String hostname,
47+
String runtimeID,
48+
String service,
49+
String env,
50+
String serviceVersion,
51+
UTF8BytesString processTags,
52+
String containerID) {
53+
GrowableBuffer buffer = new GrowableBuffer(1024);
54+
MsgPackWriter writer = new MsgPackWriter(buffer);
55+
56+
int mapElements = 4;
57+
mapElements += (runtimeID != null && !runtimeID.isEmpty()) ? 1 : 0;
58+
mapElements += (service != null && !service.isEmpty()) ? 1 : 0;
59+
mapElements += (env != null && !env.isEmpty()) ? 1 : 0;
60+
mapElements += (serviceVersion != null && !serviceVersion.isEmpty()) ? 1 : 0;
61+
mapElements += (processTags != null && processTags.length() > 0) ? 1 : 0;
62+
mapElements += (containerID != null && !containerID.isEmpty()) ? 1 : 0;
63+
64+
writer.startMap(mapElements);
65+
66+
writer.writeUTF8("schema_version".getBytes(ISO_8859_1));
67+
writer.writeInt(2);
68+
69+
writer.writeUTF8("tracer_language".getBytes(ISO_8859_1));
70+
writer.writeUTF8("java".getBytes(ISO_8859_1));
71+
72+
writer.writeUTF8("tracer_version".getBytes(ISO_8859_1));
73+
writer.writeUTF8(tracerVersion.getBytes(ISO_8859_1));
74+
75+
writer.writeUTF8("hostname".getBytes(ISO_8859_1));
76+
writer.writeUTF8(hostname.getBytes(ISO_8859_1));
77+
78+
if (runtimeID != null && !runtimeID.isEmpty()) {
79+
writer.writeUTF8("runtime_id".getBytes(ISO_8859_1));
80+
writer.writeUTF8(runtimeID.getBytes(ISO_8859_1));
81+
}
82+
if (service != null && !service.isEmpty()) {
83+
writer.writeUTF8("service_name".getBytes(ISO_8859_1));
84+
writer.writeUTF8(service.getBytes(ISO_8859_1));
85+
}
86+
if (env != null && !env.isEmpty()) {
87+
writer.writeUTF8("service_env".getBytes(ISO_8859_1));
88+
writer.writeUTF8(env.getBytes(ISO_8859_1));
89+
}
90+
if (serviceVersion != null && !serviceVersion.isEmpty()) {
91+
writer.writeUTF8("service_version".getBytes(ISO_8859_1));
92+
writer.writeUTF8(serviceVersion.getBytes(ISO_8859_1));
93+
}
94+
if (processTags != null && processTags.length() > 0) {
95+
writer.writeUTF8("process_tags".getBytes(ISO_8859_1));
96+
writer.writeUTF8(processTags);
97+
}
98+
if (containerID != null && !containerID.isEmpty()) {
99+
writer.writeUTF8("container_id".getBytes(ISO_8859_1));
100+
writer.writeUTF8(containerID.getBytes(ISO_8859_1));
101+
}
102+
103+
ByteBuffer byteBuffer = buffer.slice();
104+
byte[] bytes = new byte[byteBuffer.remaining()];
105+
byteBuffer.get(bytes);
106+
return bytes;
107+
}
108+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package datadog.trace.core.servicediscovery;
2+
3+
import java.util.function.Supplier;
4+
5+
@FunctionalInterface
6+
public interface ServiceDiscoveryFactory extends Supplier<ServiceDiscovery> {}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package datadog.trace.core.servicediscovery
2+
3+
import datadog.trace.bootstrap.instrumentation.api.UTF8BytesString
4+
import datadog.trace.core.test.DDCoreSpecification
5+
import spock.lang.Timeout
6+
import org.msgpack.core.MessagePack
7+
import org.msgpack.value.MapValue
8+
9+
10+
@Timeout(10)
11+
class ServiceDiscoveryTest extends DDCoreSpecification {
12+
def "encodePayload with all optional fields"() {
13+
given:
14+
String tracerVersion = "1.2.3"
15+
String hostname = "test-host"
16+
String runtimeID = "rid-123"
17+
String service = "orders"
18+
String env = "prod"
19+
String serviceVersion = "1.1.1"
20+
UTF8BytesString processTags = UTF8BytesString.create("key1:val1,key2:val2")
21+
String containerID = "containerID"
22+
23+
when:
24+
byte[] out = ServiceDiscovery.encodePayload(tracerVersion, hostname, runtimeID, service, env, serviceVersion, processTags, containerID)
25+
MapValue map = MessagePack.newDefaultUnpacker(out).unpackValue().asMapValue()
26+
27+
then:
28+
map.size() == 10
29+
and:
30+
map.toString() == '{"schema_version":2,"tracer_language":"java","tracer_version":"1.2.3","hostname":"test-host","runtime_id":"rid-123","service_name":"orders","service_env":"prod","service_version":"1.1.1","process_tags":"key1:val1,key2:val2","container_id":"containerID"}'
31+
}
32+
33+
def "encodePayload only required fields"() {
34+
given:
35+
String tracerVersion = "1.2.3"
36+
String hostname = "my_host"
37+
38+
when:
39+
byte[] out = ServiceDiscovery.encodePayload(tracerVersion, hostname, null, null, null, null, null, null)
40+
MapValue map = MessagePack.newDefaultUnpacker(out).unpackValue().asMapValue()
41+
42+
then:
43+
map.size() == 4
44+
and:
45+
map.toString() == '{"schema_version":2,"tracer_language":"java","tracer_version":"1.2.3","hostname":"my_host"}'
46+
}
47+
}

0 commit comments

Comments
 (0)