Merge pull request #2224 from jsternberg/otelsdk-package

otel: include service instance id attribute to resource and move to metricutil package
This commit is contained in:
Tõnis Tiigi 2024-01-30 14:45:24 -08:00 committed by GitHub
commit cb856682e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 151 additions and 103 deletions

View File

@ -19,7 +19,6 @@ import (
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/cli/cli/command"
@ -43,14 +42,6 @@ type bakeOptions struct {
}
func runBake(ctx context.Context, dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) {
mp, report, err := metrics.MeterProvider(dockerCli)
if err != nil {
return err
}
defer report()
recordVersionInfo(mp, "bake")
ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake")
if err != nil {
return err

View File

@ -30,10 +30,9 @@ import (
"github.com/docker/buildx/util/cobrautil"
"github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/ioset"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/metricutil"
"github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing"
"github.com/docker/buildx/version"
"github.com/docker/cli-docs-tool/annotation"
"github.com/docker/cli/cli"
"github.com/docker/cli/cli/command"
@ -53,9 +52,6 @@ import (
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"google.golang.org/grpc/codes"
)
@ -216,13 +212,11 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) {
}
func runBuild(ctx context.Context, dockerCli command.Cli, options buildOptions) (err error) {
mp, report, err := metrics.MeterProvider(dockerCli)
mp, err := metricutil.NewMeterProvider(ctx, dockerCli)
if err != nil {
return err
}
defer report()
recordVersionInfo(mp, "build")
defer mp.Report(context.Background())
ctx, end, err := tracing.TraceCurrentCommand(ctx, "build")
if err != nil {
@ -936,30 +930,3 @@ func maybeJSONArray(v string) []string {
}
return []string{v}
}
func recordVersionInfo(mp metric.MeterProvider, command string) {
// Still in the process of testing/stabilizing these counters.
if !isExperimental() {
return
}
meter := mp.Meter("github.com/docker/buildx",
metric.WithInstrumentationVersion(version.Version),
)
counter, err := meter.Int64Counter("docker.cli.count",
metric.WithDescription("Number of invocations of the docker buildx command."),
)
if err != nil {
otel.Handle(err)
}
counter.Add(context.Background(), 1,
metric.WithAttributes(
attribute.String("command", command),
attribute.String("package", version.Package),
attribute.String("version", version.Version),
attribute.String("revision", version.Revision),
),
)
}

2
go.mod
View File

@ -42,6 +42,7 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/mod v0.13.0
@ -147,7 +148,6 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect

15
util/confutil/exp.go Normal file
View File

@ -0,0 +1,15 @@
package confutil
import (
"os"
"strconv"
)
// IsExperimental checks if the experimental flag has been configured.
func IsExperimental() bool {
if v, ok := os.LookupEnv("BUILDX_EXPERIMENTAL"); ok {
vv, _ := strconv.ParseBool(v)
return vv
}
return false
}

View File

@ -1,4 +1,4 @@
package metrics
package metricutil
import (
"context"
@ -7,8 +7,9 @@ import (
"path"
"time"
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/version"
"github.com/docker/cli/cli/command"
"github.com/moby/buildkit/util/tracing/detect"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric"
@ -20,64 +21,76 @@ import (
const (
otelConfigFieldName = "otel"
shutdownTimeout = 2 * time.Second
reportTimeout = 2 * time.Second
)
// ReportFunc is invoked to signal the metrics should be sent to the
// desired endpoint. It should be invoked on application shutdown.
type ReportFunc func()
// MeterProvider holds a MeterProvider for metric generation and the configured
// exporters for reporting metrics from the CLI.
type MeterProvider struct {
metric.MeterProvider
reader *sdkmetric.ManualReader
exporters []sdkmetric.Exporter
}
// MeterProvider returns a MeterProvider suitable for CLI usage.
// The primary difference between this metric reader and a more typical
// usage is that metric reporting only happens once when ReportFunc
// is invoked.
func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) {
// NewMeterProvider configures a MeterProvider from the CLI context.
func NewMeterProvider(ctx context.Context, cli command.Cli) (*MeterProvider, error) {
var exps []sdkmetric.Exporter
// Only metric exporters if the experimental flag is set.
if confutil.IsExperimental() {
if exp, err := dockerOtelExporter(cli); err != nil {
return nil, nil, err
return nil, err
} else if exp != nil {
exps = append(exps, exp)
}
if exp, err := detectOtlpExporter(context.Background()); err != nil {
return nil, nil, err
if exp, err := detectOtlpExporter(ctx); err != nil {
return nil, err
} else if exp != nil {
exps = append(exps, exp)
}
}
if len(exps) == 0 {
// No exporters are configured so use a noop provider.
return noop.NewMeterProvider(), func() {}, nil
return &MeterProvider{
MeterProvider: noop.NewMeterProvider(),
}, nil
}
// Use delta temporality because, since this is a CLI program, we can never
// know the cumulative value.
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
mp := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(detect.Resource()),
sdkmetric.WithResource(Resource()),
sdkmetric.WithReader(reader),
)
return mp, reportFunc(reader, exps), nil
return &MeterProvider{
MeterProvider: mp,
reader: reader,
exporters: exps,
}, nil
}
// reportFunc returns a ReportFunc for collecting ResourceMetrics and then
// exporting them to the configured Exporter.
func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc {
return func() {
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
// Report exports metrics to the configured exporter. This should be done before the CLI
// exits.
func (m *MeterProvider) Report(ctx context.Context) {
if m.reader == nil {
// Not configured.
return
}
ctx, cancel := context.WithTimeout(ctx, reportTimeout)
defer cancel()
var rm metricdata.ResourceMetrics
if err := reader.Collect(ctx, &rm); err != nil {
if err := m.reader.Collect(ctx, &rm); err != nil {
// Error when collecting metrics. Do not send any.
return
}
var eg errgroup.Group
for _, exp := range exps {
for _, exp := range m.exporters {
exp := exp
eg.Go(func() error {
_ = exp.Export(ctx, &rm)
@ -88,7 +101,6 @@ func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc {
// Can't report an error because we don't allow it to.
_ = eg.Wait()
}
}
// dockerOtelExporter reads the CLI metadata to determine an OTLP exporter
@ -184,6 +196,20 @@ func otelExporterOtlpEndpoint(cli command.Cli) (string, error) {
}
// deltaTemporality sets the Temporality of every instrument to delta.
//
// This isn't really needed since we create a unique resource on each invocation,
// but it can help with cardinality concerns for downstream processors since they can
// perform aggregation for a time interval and then discard the data once that time
// period has passed. Cumulative temporality would imply to the downstream processor
// that they might receive a successive point and they may unnecessarily keep state
// they really shouldn't.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}
// Meter returns a Meter from the MetricProvider that indicates the measurement
// comes from buildx with the appropriate version.
func Meter(mp metric.MeterProvider) metric.Meter {
return mp.Meter(version.Package,
metric.WithInstrumentationVersion(version.Version))
}

View File

@ -1,4 +1,4 @@
package metrics
package metricutil
import (
"context"
@ -35,13 +35,9 @@ func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) {
switch proto {
case "grpc":
return otlpmetricgrpc.New(ctx,
otlpmetricgrpc.WithTemporalitySelector(deltaTemporality),
)
return otlpmetricgrpc.New(ctx)
case "http/protobuf":
return otlpmetrichttp.New(ctx,
otlpmetrichttp.WithTemporalitySelector(deltaTemporality),
)
return otlpmetrichttp.New(ctx)
// case "http/json": // unsupported by library
default:
return nil, errors.Errorf("unsupported otlp protocol %v", proto)

View File

@ -0,0 +1,53 @@
package metricutil
import (
"context"
"os"
"path/filepath"
"sync"
"github.com/google/uuid"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
)
var (
res *resource.Resource
resOnce sync.Once
)
// Resource retrieves the OTEL resource for the buildx CLI.
func Resource() *resource.Resource {
resOnce.Do(func() {
var err error
res, err = resource.New(context.Background(),
resource.WithDetectors(serviceNameDetector{}),
resource.WithAttributes(
// Use a unique instance id so OTEL knows that each invocation
// of the CLI is its own instance. Without this, downstream
// OTEL processors may think the same process is restarting
// continuously and reset the metric counters.
semconv.ServiceInstanceID(uuid.New().String()),
),
resource.WithFromEnv(),
resource.WithTelemetrySDK(),
)
if err != nil {
otel.Handle(err)
}
})
return res
}
type serviceNameDetector struct{}
func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
return resource.StringDetector(
semconv.SchemaURL,
semconv.ServiceNameKey,
func() (string, error) {
return filepath.Base(os.Args[0]), nil
},
).Detect(ctx)
}