CrazyMax e423a67f7b
vendor: github.com/moby/buildkit v0.13.0-rc2
full diff: https://github.com/moby/buildkit/compare/8e3fe35738c2...v0.13.0-rc2

Signed-off-by: CrazyMax <1951866+crazy-max@users.noreply.github.com>
2024-02-24 17:14:01 +01:00

146 lines
4.8 KiB
Go

package session
import (
"context"
"math"
"net"
"sync/atomic"
"time"
"github.com/containerd/containerd/defaults"
grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/grpcerrors"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.opentelemetry.io/otel/trace"
"golang.org/x/net/http2"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/health/grpc_health_v1"
)
func serve(ctx context.Context, grpcServer *grpc.Server, conn net.Conn) {
go func() {
<-ctx.Done()
conn.Close()
}()
bklog.G(ctx).Debugf("serving grpc connection")
(&http2.Server{}).ServeConn(conn, &http2.ServeConnOpts{Handler: grpcServer})
}
func grpcClientConn(ctx context.Context, conn net.Conn) (context.Context, *grpc.ClientConn, error) {
var unary []grpc.UnaryClientInterceptor
var stream []grpc.StreamClientInterceptor
var dialCount int64
dialer := grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
if c := atomic.AddInt64(&dialCount, 1); c > 1 {
return nil, errors.Errorf("only one connection allowed")
}
return conn, nil
})
dialOpts := []grpc.DialOption{
dialer,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)),
grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)),
}
if span := trace.SpanFromContext(ctx); span.SpanContext().IsValid() {
unary = append(unary, filterClient(otelgrpc.UnaryClientInterceptor(otelgrpc.WithTracerProvider(span.TracerProvider()), otelgrpc.WithPropagators(propagators)))) //nolint:staticcheck // TODO(thaJeztah): ignore SA1019 for deprecated options: see https://github.com/moby/buildkit/issues/4681
stream = append(stream, otelgrpc.StreamClientInterceptor(otelgrpc.WithTracerProvider(span.TracerProvider()), otelgrpc.WithPropagators(propagators))) //nolint:staticcheck // TODO(thaJeztah): ignore SA1019 for deprecated options: see https://github.com/moby/buildkit/issues/4681
}
unary = append(unary, grpcerrors.UnaryClientInterceptor)
stream = append(stream, grpcerrors.StreamClientInterceptor)
if len(unary) == 1 {
dialOpts = append(dialOpts, grpc.WithUnaryInterceptor(unary[0]))
} else if len(unary) > 1 {
dialOpts = append(dialOpts, grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(unary...)))
}
if len(stream) == 1 {
dialOpts = append(dialOpts, grpc.WithStreamInterceptor(stream[0]))
} else if len(stream) > 1 {
dialOpts = append(dialOpts, grpc.WithStreamInterceptor(grpc_middleware.ChainStreamClient(stream...)))
}
cc, err := grpc.DialContext(ctx, "localhost", dialOpts...)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to create grpc client")
}
ctx, cancel := context.WithCancelCause(ctx)
go monitorHealth(ctx, cc, cancel)
return ctx, cc, nil
}
func monitorHealth(ctx context.Context, cc *grpc.ClientConn, cancelConn func(error)) {
defer cancelConn(errors.WithStack(context.Canceled))
defer cc.Close()
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
healthClient := grpc_health_v1.NewHealthClient(cc)
failedBefore := false
consecutiveSuccessful := 0
defaultHealthcheckDuration := 30 * time.Second
lastHealthcheckDuration := time.Duration(0)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
// This healthcheck can erroneously fail in some instances, such as receiving lots of data in a low-bandwidth scenario or too many concurrent builds.
// So, this healthcheck is purposely long, and can tolerate some failures on purpose.
healthcheckStart := time.Now()
timeout := time.Duration(math.Max(float64(defaultHealthcheckDuration), float64(lastHealthcheckDuration)*1.5))
ctx, cancel := context.WithCancelCause(ctx)
ctx, _ = context.WithTimeoutCause(ctx, timeout, errors.WithStack(context.DeadlineExceeded))
_, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
cancel(errors.WithStack(context.Canceled))
lastHealthcheckDuration = time.Since(healthcheckStart)
logFields := logrus.Fields{
"timeout": timeout,
"actualDuration": lastHealthcheckDuration,
}
if err != nil {
select {
case <-ctx.Done():
return
default:
}
if failedBefore {
bklog.G(ctx).Error("healthcheck failed fatally")
return
}
failedBefore = true
consecutiveSuccessful = 0
bklog.G(ctx).WithFields(logFields).Warn("healthcheck failed")
} else {
consecutiveSuccessful++
if consecutiveSuccessful >= 5 && failedBefore {
failedBefore = false
bklog.G(ctx).WithFields(logFields).Debug("reset healthcheck failure")
}
}
bklog.G(ctx).WithFields(logFields).Trace("healthcheck completed")
}
}
}