Skip to content

Commit f1bcf41

Browse files
committed
fix: adapt operator, Helm, and tests for Cilium v1.19
Signed-off-by: Quang Nguyen <nguyenquang@microsoft.com>
1 parent 28933e7 commit f1bcf41

File tree

16 files changed

+254
-314
lines changed

16 files changed

+254
-314
lines changed

deploy/hubble/manifests/controller/helm/retina/templates/agent/daemonset.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ spec:
124124
- name: cilium
125125
mountPath: /var/run/cilium
126126
- name: host-os-release
127-
mountPath: /etc/os-release
127+
mountPath: /etc/os-release
128128
{{- if .Values.hubble.tls.enabled }}
129129
- name: tls
130130
mountPath: /var/lib/cilium/tls/hubble
@@ -154,7 +154,7 @@ spec:
154154
- name: host-os-release
155155
hostPath:
156156
path: /etc/os-release
157-
type: FileOrCreate
157+
type: FileOrCreate
158158
{{- if .Values.hubble.tls.enabled }}
159159
- name: tls
160160
projected:

operator/cilium-crds/k8s/apis/register.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,10 @@ func createCRD(crdVersionedName, crdMetaName string) func(clientset apiextension
101101
clientset,
102102
constructV1CRD(crdMetaName, ciliumCRD),
103103
crdhelpers.NewDefaultPoller(),
104-
k8sconst.CustomResourceDefinitionSchemaVersionKey,
105-
versioncheck.MustVersion(k8sconst.CustomResourceDefinitionSchemaVersion),
104+
crdhelpers.NeedsUpdateV1Factory(
105+
k8sconst.CustomResourceDefinitionSchemaVersionKey,
106+
versioncheck.MustVersion(k8sconst.CustomResourceDefinitionSchemaVersion),
107+
),
106108
)
107109
if err != nil {
108110
return fmt.Errorf("Unable to create CRD %s: %w", crdMetaName, err)

operator/cilium-crds/k8s/fakeresource.go renamed to operator/cilium-crds/k8s/fakeresource_linux.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ import (
88
"github.com/cilium/cilium/pkg/k8s/resource"
99
)
1010

11-
type fakeresource[T k8sRuntime.Object] struct {
12-
}
11+
type fakeresource[T k8sRuntime.Object] struct{}
1312

1413
func (f *fakeresource[T]) Events(ctx context.Context, opts ...resource.EventsOpt) <-chan resource.Event[T] {
1514
return make(<-chan resource.Event[T])

operator/cilium-crds/k8s/resource_ctors.go

Lines changed: 0 additions & 62 deletions
This file was deleted.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// Copyright Authors of Retina and Cilium
3+
4+
package k8s
5+
6+
import (
7+
operatork8s "github.com/cilium/cilium/operator/k8s"
8+
)
9+
10+
// Re-export Cilium's operator resource constructors.
11+
// These live in a _linux file because cilium/operator/k8s transitively
12+
// imports Linux-only symbols (netns.GetNetNSCookie).
13+
var (
14+
CiliumEndpointResource = operatork8s.CiliumEndpointResource
15+
CiliumEndpointSliceResource = operatork8s.CiliumEndpointSliceResource
16+
PodResource = operatork8s.PodResource
17+
CiliumEndpointIndexIdentity = operatork8s.CiliumEndpointIndexIdentity
18+
)

operator/cilium-crds/k8s/resources.go renamed to operator/cilium-crds/k8s/resources_linux.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ import (
1818
"github.com/cilium/hive/cell"
1919
)
2020

21-
const (
22-
CiliumEndpointIndexIdentity = "identity"
23-
)
24-
2521
// ResourcesCell provides a set of handles to Kubernetes resources used throughout the
2622
// operator. Each of the resources share a client-go informer and backing store so we only
2723
// have one watch API call for each resource kind and that we maintain only one copy of each object.
@@ -39,7 +35,7 @@ var ResourcesCell = cell.Module(
3935
func() resource.Resource[*cilium_api_v2.CiliumNode] {
4036
return &fakeresource[*cilium_api_v2.CiliumNode]{}
4137
},
42-
k8s.PodResource,
38+
PodResource,
4339
k8s.NamespaceResource,
4440
),
4541
)

operator/cmd/cilium-crds/cells_linux.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ package ciliumcrds
99
import (
1010
"context"
1111
"fmt"
12+
"log/slog"
1213
"sync/atomic"
1314

1415
"github.com/microsoft/retina/internal/buildinfo"
1516
"github.com/microsoft/retina/pkg/shared/telemetry"
16-
"github.com/sirupsen/logrus"
1717
k8sruntime "k8s.io/apimachinery/pkg/runtime"
1818
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
1919
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
@@ -34,10 +34,12 @@ import (
3434
cmtypes "github.com/cilium/cilium/pkg/clustermesh/types"
3535
"github.com/cilium/cilium/pkg/controller"
3636
k8sClient "github.com/cilium/cilium/pkg/k8s/client"
37+
"github.com/cilium/cilium/pkg/kvstore"
3738
"github.com/cilium/cilium/pkg/kvstore/store"
3839
"github.com/cilium/cilium/pkg/option"
3940
"github.com/cilium/cilium/pkg/pprof"
4041
"github.com/cilium/hive/cell"
42+
"github.com/cilium/statedb"
4143
)
4244

4345
const operatorK8sNamespace = "kube-system"
@@ -47,7 +49,7 @@ var (
4749
"operator",
4850
"Retina Operator",
4951

50-
cell.Invoke(func(l logrus.FieldLogger) {
52+
cell.Invoke(func(l *slog.Logger) {
5153
// to help prevent user confusion, explain why logs may include lines referencing "cilium" or "cilium operator"
5254
// e.g. level=info msg="Cilium Operator go version go1.21.4 linux/amd64" subsys=retina-operator
5355
l.Info("starting hive. Some logs will say 'cilium' since some code is derived from cilium")
@@ -91,6 +93,11 @@ var (
9193
// Provides Clientset, API for accessing Kubernetes objects.
9294
k8sClient.Cell,
9395

96+
// Provide in-memory kvstore client for identity GC (not using etcd in Retina)
97+
cell.Provide(func(db *statedb.DB) kvstore.Client {
98+
return kvstore.NewInMemoryClient(db, "default")
99+
}),
100+
94101
// Provides the modular metrics registry, metric HTTP server and standard metrics cell.
95102
// NOTE: no server/metrics are created when --enable-metrics=false (default)
96103
operatorMetrics.Cell,

operator/cmd/cilium-crds/flags.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@ func InitGlobalFlags(cmd *cobra.Command, vp *viper.Viper) {
4242
// NOTE: without this the option gets overridden from the default value to the zero value via option.Config.Populate(vp)
4343
// specifically, here options.Config.AllocatorListTimeout gets overridden from the default value to 0s
4444
flags.Duration(option.AllocatorListTimeoutName, defaults.AllocatorListTimeout, "timeout to list initial allocator state")
45-
// similar overriding happens for option.Config.KVstoreConnectivityTimeout
46-
flags.Duration(option.KVstoreConnectivityTimeout, defaults.KVstoreConnectivityTimeout, "Time after which an incomplete kvstore operation is considered failed")
47-
// similar overriding happens for option.Config.KVstorePeriodicSync
48-
flags.Duration(option.KVstorePeriodicSync, defaults.KVstorePeriodicSync, "Periodic KVstore synchronization interval")
4945

5046
flags.Duration(operatorOption.EndpointGCInterval, operatorOption.EndpointGCIntervalDefault, "GC interval for cilium endpoints")
5147
option.BindEnv(vp, operatorOption.EndpointGCInterval)
@@ -56,7 +52,7 @@ func InitGlobalFlags(cmd *cobra.Command, vp *viper.Viper) {
5652
flags.StringSlice(option.LogDriver, []string{}, "Logging endpoints to use for example syslog")
5753
option.BindEnv(vp, option.LogDriver)
5854

59-
flags.Var(option.NewNamedMapOptions(option.LogOpt, &option.Config.LogOpt, nil),
55+
flags.Var(option.NewMapOptions(&option.Config.LogOpt),
6056
option.LogOpt, `Log driver options for cilium-operator, `+
6157
`configmap example for syslog driver: {"syslog.level":"info","syslog.facility":"local4"}`)
6258
option.BindEnv(vp, option.LogOpt)

operator/cmd/cilium-crds/root_linux.go

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"context"
1212
"crypto/rand"
1313
"fmt"
14+
"log/slog"
1415
"math/big"
1516
"os"
1617
"path/filepath"
@@ -19,36 +20,40 @@ import (
1920
operatorOption "github.com/cilium/cilium/operator/option"
2021
"github.com/cilium/cilium/pkg/hive"
2122
k8sClient "github.com/cilium/cilium/pkg/k8s/client"
22-
k8sversion "github.com/cilium/cilium/pkg/k8s/version"
2323
"github.com/cilium/cilium/pkg/logging"
2424
"github.com/cilium/cilium/pkg/logging/logfields"
2525
"github.com/cilium/cilium/pkg/metrics"
2626
"github.com/cilium/cilium/pkg/option"
2727
"github.com/cilium/hive/cell"
2828
"github.com/microsoft/retina/internal/buildinfo"
29+
"github.com/microsoft/retina/pkg/log"
2930
"github.com/pkg/errors"
30-
"github.com/sirupsen/logrus"
3131
"github.com/spf13/viper"
32+
"go.uber.org/zap"
3233
"k8s.io/client-go/tools/leaderelection"
3334
"k8s.io/client-go/tools/leaderelection/resourcelock"
3435
)
3536

3637
var (
3738
// set logger field: subsys=retina-operator
3839
binaryName = filepath.Base(os.Args[0])
39-
logger = logging.DefaultLogger.WithField(logfields.LogSubsys, binaryName)
40+
slogLogger = logging.DefaultSlogLogger.With(logfields.LogSubsys, binaryName)
4041
operatorIDLength = 10
4142
)
4243

4344
func Execute(h *hive.Hive) {
4445
initEnv(h.Viper())
4546

46-
if err := h.Run(logging.DefaultSlogLogger); err != nil {
47-
logger.Fatal(err)
47+
// Use zap-backed slog logger for hive (routes to stdout + Application Insights)
48+
if err := h.Run(log.SlogLogger()); err != nil {
49+
logging.Fatal(slogLogger, err.Error())
4850
}
4951
}
5052

51-
func registerOperatorHooks(l logrus.FieldLogger, lc cell.Lifecycle, llc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner) {
53+
func registerOperatorHooks(
54+
l *slog.Logger, lc cell.Lifecycle, llc *LeaderLifecycle,
55+
clientset k8sClient.Clientset, shutdowner hive.Shutdowner,
56+
) {
5257
var wg sync.WaitGroup
5358
lc.Append(cell.Hook{
5459
OnStart: func(cell.HookContext) error {
@@ -77,19 +82,28 @@ func initEnv(vp *viper.Viper) {
7782
// the default values provided in option.Config or operatorOption.Config respectively.
7883
// The values will be overridden to the "zero value".
7984
// Maybe could create a cell.Config for these instead?
80-
option.Config.Populate(vp)
81-
operatorOption.Config.Populate(vp)
85+
// slogloggercheck: using default logger for configuration initialization
86+
option.Config.Populate(logging.DefaultSlogLogger, vp)
87+
operatorOption.Config.Populate(logging.DefaultSlogLogger, vp)
8288

83-
// add hooks after setting up metrics in the option.Confog
84-
logging.DefaultLogger.Hooks.Add(metrics.NewLoggingHook())
89+
// add hooks after setting up metrics in the option.Config
90+
logging.AddHandlers(metrics.NewLoggingHook())
8591

8692
// Logging should always be bootstrapped first. Do not add any code above this!
8793
if err := logging.SetupLogging(option.Config.LogDriver, logging.LogOptions(option.Config.LogOpt), binaryName, option.Config.Debug); err != nil {
88-
logger.Fatal(err)
94+
logging.Fatal(slogLogger, err.Error())
8995
}
9096

91-
option.LogRegisteredOptions(vp, logger)
92-
logger.Infof("retina operator version: %s", buildinfo.Version)
97+
// Set up zap logger with Application Insights telemetry
98+
_, _ = log.SetupZapLogger(&log.LogOpts{
99+
Level: option.Config.LogOpt[logging.LevelOpt],
100+
ApplicationInsightsID: buildinfo.ApplicationInsightsID,
101+
EnableTelemetry: buildinfo.ApplicationInsightsID != "",
102+
}, zap.String("version", buildinfo.Version))
103+
log.SetDefaultSlog()
104+
105+
option.LogRegisteredSlogOptions(vp, slogLogger)
106+
slogLogger.Info("retina operator version", "version", buildinfo.Version)
93107
}
94108

95109
func doCleanup() {
@@ -103,32 +117,20 @@ func doCleanup() {
103117
// runOperator implements the logic of leader election for cilium-operator using
104118
// built-in leader election capability in kubernetes.
105119
// See: https://github.com/kubernetes/client-go/blob/master/examples/leader-election/main.go
106-
func runOperator(l logrus.FieldLogger, lc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner) {
120+
func runOperator(l *slog.Logger, lc *LeaderLifecycle, clientset k8sClient.Clientset, shutdowner hive.Shutdowner) {
107121
isLeader.Store(false)
108122

109123
leaderElectionCtx, leaderElectionCtxCancel = context.WithCancel(context.Background())
110124

111-
// We only support Operator in HA mode for Kubernetes Versions having support for
112-
// LeasesResourceLock.
113-
// See docs on capabilities.LeasesResourceLock for more context.
114-
if !k8sversion.Capabilities().LeasesResourceLock {
115-
l.Info("Support for coordination.k8s.io/v1 not present, fallback to non HA mode")
116-
117-
if err := lc.Start(logging.DefaultSlogLogger, leaderElectionCtx); err != nil {
118-
l.WithError(err).Fatal("Failed to start leading")
119-
}
120-
return
121-
}
122-
123125
// Get hostname for identity name of the lease lock holder.
124126
// We identify the leader of the operator cluster using hostname.
125127
operatorID, err := os.Hostname()
126128
if err != nil {
127-
l.WithError(err).Fatal("Failed to get hostname when generating lease lock identity")
129+
logging.Fatal(l, "Failed to get hostname when generating lease lock identity", logfields.Error, err)
128130
}
129131
operatorID, err = randomStringWithPrefix(operatorID+"-", operatorIDLength)
130132
if err != nil {
131-
l.WithError(err).Fatal("Failed to generate random string for lease lock identity")
133+
logging.Fatal(l, "Failed to generate random string for lease lock identity", logfields.Error, err)
132134
}
133135

134136
leResourceLock, err := resourcelock.NewFromKubeconfig(
@@ -142,7 +144,7 @@ func runOperator(l logrus.FieldLogger, lc *LeaderLifecycle, clientset k8sClient.
142144
clientset.RestConfig(),
143145
operatorOption.Config.LeaderElectionRenewDeadline)
144146
if err != nil {
145-
l.WithError(err).Fatal("Failed to create resource lock for leader election")
147+
logging.Fatal(l, "Failed to create resource lock for leader election", logfields.Error, err)
146148
}
147149

148150
// Start the leader election for running cilium-operators
@@ -160,23 +162,20 @@ func runOperator(l logrus.FieldLogger, lc *LeaderLifecycle, clientset k8sClient.
160162
Callbacks: leaderelection.LeaderCallbacks{
161163
OnStartedLeading: func(ctx context.Context) {
162164
if err := lc.Start(logging.DefaultSlogLogger, ctx); err != nil {
163-
l.WithError(err).Error("Failed to start when elected leader, shutting down")
165+
l.Error("Failed to start when elected leader, shutting down", logfields.Error, err)
164166
shutdowner.Shutdown(hive.ShutdownWithError(err))
165167
}
166168
},
167169
OnStoppedLeading: func() {
168-
l.WithField("operator-id", operatorID).Info("Leader election lost")
170+
l.Info("Leader election lost", "operator-id", operatorID)
169171
// Cleanup everything here, and exit.
170172
shutdowner.Shutdown(hive.ShutdownWithError(errors.New("Leader election lost")))
171173
},
172174
OnNewLeader: func(identity string) {
173175
if identity == operatorID {
174176
l.Info("Leading the operator HA deployment")
175177
} else {
176-
l.WithFields(logrus.Fields{
177-
"newLeader": identity,
178-
"operatorID": operatorID,
179-
}).Info("Leader re-election complete")
178+
l.Info("Leader re-election complete", "newLeader", identity, "operatorID", operatorID)
180179
}
181180
},
182181
},

0 commit comments

Comments
 (0)