From 7094eb86c9f4858cfa0256b865a5826a4fee3cc8 Mon Sep 17 00:00:00 2001 From: "Jonathan A. Sternberg" Date: Tue, 12 Dec 2023 16:27:49 -0600 Subject: [PATCH] metrics: send metrics to the otel collector endpoint when active Introduce a meter provider to the buildx cli that will send metrics to the otel-collector included in docker desktop if enabled. This will send usage metrics to the desktop application but also send metrics to a user-provided otlp receiver endpoint through the standard environment variables. This introduces a single metric which is the cli count for build and bake along with the command name and a few additional attributes. Signed-off-by: Jonathan A. Sternberg --- commands/bake.go | 9 ++ commands/build.go | 41 +++++++++ go.mod | 8 +- util/metrics/metrics.go | 189 ++++++++++++++++++++++++++++++++++++++++ util/metrics/otlp.go | 49 +++++++++++ 5 files changed, 292 insertions(+), 4 deletions(-) create mode 100644 util/metrics/metrics.go create mode 100644 util/metrics/otlp.go diff --git a/commands/bake.go b/commands/bake.go index 77ad7529..5cfa3979 100644 --- a/commands/bake.go +++ b/commands/bake.go @@ -19,6 +19,7 @@ import ( "github.com/docker/buildx/util/confutil" "github.com/docker/buildx/util/desktop" "github.com/docker/buildx/util/dockerutil" + "github.com/docker/buildx/util/metrics" "github.com/docker/buildx/util/progress" "github.com/docker/buildx/util/tracing" "github.com/docker/cli/cli/command" @@ -45,6 +46,14 @@ type bakeOptions struct { func runBake(dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) { ctx := appcontext.Context() + mp, report, err := metrics.MeterProvider(dockerCli) + if err != nil { + return err + } + defer report() + + recordVersionInfo(mp, "bake") + ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake") if err != nil { return err diff --git a/commands/build.go b/commands/build.go index f426658e..f290de67 100644 --- a/commands/build.go +++ b/commands/build.go @@ -29,8 +29,10 @@ import ( "github.com/docker/buildx/util/buildflags" "github.com/docker/buildx/util/desktop" "github.com/docker/buildx/util/ioset" + "github.com/docker/buildx/util/metrics" "github.com/docker/buildx/util/progress" "github.com/docker/buildx/util/tracing" + "github.com/docker/buildx/version" "github.com/docker/cli-docs-tool/annotation" "github.com/docker/cli/cli" "github.com/docker/cli/cli/command" @@ -51,6 +53,9 @@ import ( "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/pflag" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" "google.golang.org/grpc/codes" ) @@ -212,6 +217,15 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) { func runBuild(dockerCli command.Cli, options buildOptions) (err error) { ctx := appcontext.Context() + + mp, report, err := metrics.MeterProvider(dockerCli) + if err != nil { + return err + } + defer report() + + recordVersionInfo(mp, "build") + ctx, end, err := tracing.TraceCurrentCommand(ctx, "build") if err != nil { return err @@ -926,3 +940,30 @@ func maybeJSONArray(v string) []string { } return []string{v} } + +func recordVersionInfo(mp metric.MeterProvider, command string) { + // Still in the process of testing/stabilizing these counters. + if !isExperimental() { + return + } + + meter := mp.Meter("github.com/docker/buildx", + metric.WithInstrumentationVersion(version.Version), + ) + + counter, err := meter.Int64Counter("docker.cli.count", + metric.WithDescription("Number of invocations of the docker buildx command."), + ) + if err != nil { + otel.Handle(err) + } + + counter.Add(context.Background(), 1, + metric.WithAttributes( + attribute.String("command", command), + attribute.String("package", version.Package), + attribute.String("version", version.Version), + attribute.String("revision", version.Revision), + ), + ) +} diff --git a/go.mod b/go.mod index befd3d89..65f2f1ec 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,10 @@ require ( github.com/stretchr/testify v1.8.4 github.com/zclconf/go-cty v1.14.1 go.opentelemetry.io/otel v1.19.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 + go.opentelemetry.io/otel/metric v1.19.0 + go.opentelemetry.io/otel/sdk/metric v1.19.0 go.opentelemetry.io/otel/trace v1.19.0 golang.org/x/mod v0.11.0 golang.org/x/sync v0.3.0 @@ -138,15 +142,11 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.45.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect - go.opentelemetry.io/otel/metric v1.19.0 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.19.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect golang.org/x/crypto v0.17.0 // indirect golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect diff --git a/util/metrics/metrics.go b/util/metrics/metrics.go new file mode 100644 index 00000000..36e1cc62 --- /dev/null +++ b/util/metrics/metrics.go @@ -0,0 +1,189 @@ +package metrics + +import ( + "context" + "fmt" + "net/url" + "path" + "time" + + "github.com/docker/cli/cli/command" + "github.com/moby/buildkit/util/tracing/detect" + "github.com/pkg/errors" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "golang.org/x/sync/errgroup" +) + +const ( + otelConfigFieldName = "otel" + shutdownTimeout = 2 * time.Second +) + +// ReportFunc is invoked to signal the metrics should be sent to the +// desired endpoint. It should be invoked on application shutdown. +type ReportFunc func() + +// MeterProvider returns a MeterProvider suitable for CLI usage. +// The primary difference between this metric reader and a more typical +// usage is that metric reporting only happens once when ReportFunc +// is invoked. +func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) { + var exps []sdkmetric.Exporter + + if exp, err := dockerOtelExporter(cli); err != nil { + return nil, nil, err + } else if exp != nil { + exps = append(exps, exp) + } + + if exp, err := detectOtlpExporter(context.Background()); err != nil { + return nil, nil, err + } else if exp != nil { + exps = append(exps, exp) + } + + if len(exps) == 0 { + // No exporters are configured so use a noop provider. + return noop.NewMeterProvider(), func() {}, nil + } + + // Use delta temporality because, since this is a CLI program, we can never + // know the cumulative value. + reader := sdkmetric.NewManualReader( + sdkmetric.WithTemporalitySelector(deltaTemporality), + ) + mp := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(detect.Resource()), + sdkmetric.WithReader(reader), + ) + return mp, reportFunc(reader, exps), nil +} + +// reportFunc returns a ReportFunc for collecting ResourceMetrics and then +// exporting them to the configured Exporter. +func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc { + return func() { + ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer cancel() + + var rm metricdata.ResourceMetrics + if err := reader.Collect(ctx, &rm); err != nil { + // Error when collecting metrics. Do not send any. + return + } + + var eg errgroup.Group + for _, exp := range exps { + exp := exp + eg.Go(func() error { + _ = exp.Export(ctx, &rm) + _ = exp.Shutdown(ctx) + return nil + }) + } + + // Can't report an error because we don't allow it to. + _ = eg.Wait() + } +} + +// dockerOtelExporter reads the CLI metadata to determine an OTLP exporter +// endpoint for docker metrics to be sent. +// +// This location, configuration, and usage is hard-coded as part of +// sending usage statistics so this metric reporting is not meant to be +// user facing. +func dockerOtelExporter(cli command.Cli) (sdkmetric.Exporter, error) { + endpoint, err := otelExporterOtlpEndpoint(cli) + if endpoint == "" || err != nil { + return nil, err + } + + // Parse the endpoint. The docker config expects the endpoint to be + // in the form of a URL to match the environment variable, but this + // option doesn't correspond directly to WithEndpoint. + // + // We pretend we're the same as the environment reader. + u, err := url.Parse(endpoint) + if err != nil { + return nil, errors.Errorf("docker otel endpoint is invalid: %s", err) + } + + var opts []otlpmetricgrpc.Option + switch u.Scheme { + case "unix": + // Unix sockets are a bit weird. OTEL seems to imply they + // can be used as an environment variable and are handled properly, + // but they don't seem to be as the behavior of the environment variable + // is to strip the scheme from the endpoint, but the underlying implementation + // needs the scheme to use the correct resolver. + // + // We'll just handle this in a special way and add the unix:// back to the endpoint. + opts = []otlpmetricgrpc.Option{ + otlpmetricgrpc.WithEndpoint(fmt.Sprintf("unix://%s", path.Join(u.Host, u.Path))), + otlpmetricgrpc.WithInsecure(), + } + case "http": + opts = []otlpmetricgrpc.Option{ + // Omit the scheme when using http or https. + otlpmetricgrpc.WithEndpoint(path.Join(u.Host, u.Path)), + otlpmetricgrpc.WithInsecure(), + } + default: + opts = []otlpmetricgrpc.Option{ + // Omit the scheme when using http or https. + otlpmetricgrpc.WithEndpoint(path.Join(u.Host, u.Path)), + } + } + + // Hardcoded endpoint from the endpoint. + exp, err := otlpmetricgrpc.New(context.Background(), opts...) + if err != nil { + return nil, err + } + return exp, nil +} + +// otelExporterOtlpEndpoint retrieves the OTLP endpoint used for the docker reporter +// from the current context. +func otelExporterOtlpEndpoint(cli command.Cli) (string, error) { + meta, err := cli.ContextStore().GetMetadata(cli.CurrentContext()) + if err != nil { + return "", err + } + + var otelCfg interface{} + switch m := meta.Metadata.(type) { + case command.DockerContext: + otelCfg = m.AdditionalFields[otelConfigFieldName] + case map[string]interface{}: + otelCfg = m[otelConfigFieldName] + } + + if otelCfg == nil { + return "", nil + } + + otelMap, ok := otelCfg.(map[string]interface{}) + if !ok { + return "", errors.Errorf( + "unexpected type for field %q: %T (expected: %T)", + otelConfigFieldName, + otelCfg, + otelMap, + ) + } + + // keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/ + endpoint, _ := otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string) + return endpoint, nil +} + +// deltaTemporality sets the Temporality of every instrument to delta. +func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality { + return metricdata.DeltaTemporality +} diff --git a/util/metrics/otlp.go b/util/metrics/otlp.go new file mode 100644 index 00000000..b121ac3c --- /dev/null +++ b/util/metrics/otlp.go @@ -0,0 +1,49 @@ +package metrics + +import ( + "context" + "os" + + "github.com/pkg/errors" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" +) + +// detectOtlpExporter configures a metrics exporter based on environment variables. +// This is similar to the version of this in buildkit, but we need direct access +// to the exporter and the prometheus exporter doesn't work at all in a CLI context. +// +// There's some duplication here which I hope to remove when the detect package +// is refactored or extracted from buildkit so it can be utilized here. +// +// This version of the exporter is public facing in contrast to the +// docker otel collector. +func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) { + set := os.Getenv("OTEL_METRICS_EXPORTER") == "otlp" || os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") != "" || os.Getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") != "" + if !set { + return nil, nil + } + + proto := os.Getenv("OTEL_EXPORTER_OTLP_METRICS_PROTOCOL") + if proto == "" { + proto = os.Getenv("OTEL_EXPORTER_OTLP_PROTOCOL") + } + if proto == "" { + proto = "grpc" + } + + switch proto { + case "grpc": + return otlpmetricgrpc.New(ctx, + otlpmetricgrpc.WithTemporalitySelector(deltaTemporality), + ) + case "http/protobuf": + return otlpmetrichttp.New(ctx, + otlpmetrichttp.WithTemporalitySelector(deltaTemporality), + ) + // case "http/json": // unsupported by library + default: + return nil, errors.Errorf("unsupported otlp protocol %v", proto) + } +}