Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,10 @@ verify-crd-compatibility: $(CRD_DIFF) manifests
.PHONY: test
test: manifests generate fmt lint test-unit test-e2e test-regression #HELP Run all tests.

E2E_TIMEOUT ?= 10m
.PHONY: e2e
e2e: #EXHELP Run the e2e tests.
go test -count=1 -v ./test/e2e/features_test.go
go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT)

E2E_REGISTRY_NAME := docker-registry
E2E_REGISTRY_NAMESPACE := operator-controller-e2e
Expand Down Expand Up @@ -317,6 +318,7 @@ test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover
test-experimental-e2e: COVERAGE_NAME := experimental-e2e
test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST)
test-experimental-e2e: PROMETHEUS_VALUES := helm/prom_experimental.yaml
test-experimental-e2e: E2E_TIMEOUT := 15m
test-experimental-e2e: run-internal image-registry prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster

.PHONY: prometheus
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"strings"
"sync"
"time"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -462,10 +463,13 @@ func (c *ClusterExtensionRevisionReconciler) toBoxcutterRevision(ctx context.Con
previousObjs[i] = rev
}

if err = initializeProbes(); err != nil {
return nil, nil, err
}
opts := []boxcutter.RevisionReconcileOption{
boxcutter.WithPreviousOwners(previousObjs),
boxcutter.WithProbe(boxcutter.ProgressProbeType, probing.And{
deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe,
&namespaceActiveProbe, deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, &pvcBoundProbe,
}),
}

Expand Down Expand Up @@ -511,6 +515,28 @@ func EffectiveCollisionProtection(cp ...ocv1.CollisionProtection) ocv1.Collision
return ecp
}

// initializeProbes is used to initialize CEL probes once, so we don't recreate them on every reconcile
var initializeProbes = sync.OnceValue(func() error {
nsCEL, err := probing.NewCELProbe(namespaceActiveCEL, `namespace phase must be "Active"`)
if err != nil {
return fmt.Errorf("initializing namespace CEL probe: %w", err)
}
pvcCEL, err := probing.NewCELProbe(pvcBoundCEL, `persistentvolumeclaim phase must be "Bound"`)
if err != nil {
return fmt.Errorf("initializing PVC CEL probe: %w", err)
}
namespaceActiveProbe = probing.GroupKindSelector{
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "Namespace"},
Prober: nsCEL,
}
pvcBoundProbe = probing.GroupKindSelector{
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "PersistentVolumeClaim"},
Prober: pvcCEL,
}

return nil
})

var (
deploymentProbe = &probing.GroupKindSelector{
GroupKind: schema.GroupKind{Group: appsv1.GroupName, Kind: "Deployment"},
Expand Down Expand Up @@ -542,6 +568,14 @@ var (
},
}

// namespaceActiveCEL is a CEL rule which asserts that the namespace is in "Active" phase
namespaceActiveCEL = `self.status.phase == "Active"`
namespaceActiveProbe probing.GroupKindSelector

// pvcBoundCEL is a CEL rule which asserts that the PVC is in "Bound" phase
pvcBoundCEL = `self.status.phase == "Bound"`
pvcBoundProbe probing.GroupKindSelector

// deplStaefulSetProbe probes Deployment, StatefulSet objects.
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment for deplStatefulSetProbe is misspelled/inaccurate (deplStaefulSetProbe). This makes the identifier hard to search for and is inconsistent with the actual variable name. Update the comment to match deplStatefulSetProbe (and correct “Stateful”).

Suggested change
// deplStaefulSetProbe probes Deployment, StatefulSet objects.
// deplStatefulSetProbe probes Deployment, StatefulSet objects.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't change this line, I can keep it in mind for a future PR though

deplStatefulSetProbe = &probing.ObservedGenerationProbe{
Prober: probing.And{
Expand Down
1 change: 1 addition & 0 deletions test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ Leverage existing steps for common operations:
Use these variables in YAML templates:

- `${NAME}`: Scenario-specific ClusterExtension name (e.g., `ce-123`)
- `${CER_NAME}`: Scenario-specific ClusterExtensionRevision name (e.g., `cer-123`; for applying CERs directly)
- `${TEST_NAMESPACE}`: Scenario-specific namespace (e.g., `ns-123`)
- `${CATALOG_IMG}`: Catalog image reference (defaults to in-cluster registry, overridable via `CATALOG_IMG` env var)

Expand Down
207 changes: 207 additions & 0 deletions test/e2e/features/revision.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
Feature: Install ClusterExtensionRevision

As an OLM user I would like to install a cluster extension revision.

Background:
Given OLM is available
And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}

@BoxcutterRuntime
Scenario: Install simple revision
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is this scenario related to the logic introduced in PR? We have plenty of scenarios under install.feature that install exactly these resources.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it's an easy way to prove that CERs can be used to install content without a ClusterExtension owner. I thought this was necessary since we had discussed the possibility that users might (but probably won't) use the API directly - plus its runtime is only about 1.5 seconds. I can remove it if you think it's unnecessary.

When ClusterExtensionRevision is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtensionRevision
metadata:
annotations:
olm.operatorframework.io/service-account-name: olm-sa
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
name: ${CER_NAME}
spec:
lifecycleState: Active
collisionProtection: Prevent
phases:
- name: policies
objects:
- object:
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: test-operator-network-policy
namespace: ${TEST_NAMESPACE}
spec:
podSelector: {}
policyTypes:
- Ingress
- name: deploy
objects:
- object:
apiVersion: v1
data:
httpd.sh: |
#!/bin/sh
echo "Version 1.2.0"
echo true > /var/www/started
echo true > /var/www/ready
echo true > /var/www/live
exec httpd -f -h /var/www -p 80
kind: ConfigMap
metadata:
name: httpd-script
namespace: ${TEST_NAMESPACE}
- object:
apiVersion: v1
data:
name: test-configmap
version: v1.2.0
kind: ConfigMap
metadata:
annotations:
shouldNotTemplate: |
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
name: test-configmap
namespace: ${TEST_NAMESPACE}
revision: 1
"""

And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded
And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded
And resource "networkpolicy/test-operator-network-policy" is installed
And resource "configmap/test-configmap" is installed

@BoxcutterRuntime
Scenario: Probe failure for PersistentVolumeClaim halts phase progression
When ClusterExtensionRevision is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtensionRevision
metadata:
annotations:
olm.operatorframework.io/service-account-name: olm-sa
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
name: ${CER_NAME}
spec:
lifecycleState: Active
collisionProtection: Prevent
phases:
- name: pvc
objects:
- object:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-pvc
namespace: ${TEST_NAMESPACE}
spec:
accessModes:
- ReadWriteOnce
storageClassName: ""
volumeName: test-pv
resources:
requests:
storage: 1Mi
- name: configmap
objects:
- object:
apiVersion: v1
kind: ConfigMap
metadata:
annotations:
shouldNotTemplate: |
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
name: test-configmap
namespace: ${TEST_NAMESPACE}
data:
name: test-configmap
version: v1.2.0
revision: 1
"""

And resource "persistentvolumeclaim/test-pvc" is installed
And ClusterExtensionRevision "${CER_NAME}" reports Available as False with Reason ProbeFailure and Message includes:
"""
persistentvolumeclaim phase must be "Bound"
"""
And resource "configmap/test-configmap" is not installed

@BoxcutterRuntime
Scenario: Phases progress when PersistentVolumeClaim becomes "Bound"
When ClusterExtensionRevision is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtensionRevision
metadata:
annotations:
olm.operatorframework.io/service-account-name: olm-sa
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
name: ${CER_NAME}
spec:
lifecycleState: Active
collisionProtection: Prevent
phases:
- name: pvc
objects:
- object:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-pvc
namespace: ${TEST_NAMESPACE}
spec:
accessModes:
- ReadWriteOnce
storageClassName: ""
volumeName: test-pv
resources:
requests:
storage: 1Mi
- object:
apiVersion: v1
kind: PersistentVolume
metadata:
name: test-pv
spec:
accessModes:
- ReadWriteOnce
capacity:
storage: 1Mi
claimRef:
apiVersion: v1
kind: PersistentVolumeClaim
name: test-pvc
namespace: ${TEST_NAMESPACE}
persistentVolumeReclaimPolicy: Delete
storageClassName: ""
volumeMode: Filesystem
local:
path: /tmp/persistent-volume
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- a-node-name-that-should-not-exist
- name: configmap
objects:
- object:
apiVersion: v1
kind: ConfigMap
metadata:
annotations:
shouldNotTemplate: |
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
name: test-configmap
namespace: ${TEST_NAMESPACE}
data:
name: test-configmap
version: v1.2.0
revision: 1
"""

And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded
And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded
And resource "persistentvolume/test-pv" is installed
And resource "persistentvolumeclaim/test-pvc" is installed
And resource "configmap/test-configmap" is installed
35 changes: 20 additions & 15 deletions test/e2e/steps/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ type resource struct {
}

type scenarioContext struct {
id string
namespace string
clusterExtensionName string
removedResources []unstructured.Unstructured
backGroundCmds []*exec.Cmd
metricsResponse map[string]string
id string
namespace string
clusterExtensionName string
clusterExtensionRevisionName string
removedResources []unstructured.Unstructured
backGroundCmds []*exec.Cmd
metricsResponse map[string]string

extensionObjects []client.Object
}
Expand Down Expand Up @@ -142,9 +143,10 @@ func CheckFeatureTags(ctx context.Context, sc *godog.Scenario) (context.Context,

func CreateScenarioContext(ctx context.Context, sc *godog.Scenario) (context.Context, error) {
scCtx := &scenarioContext{
id: sc.Id,
namespace: fmt.Sprintf("ns-%s", sc.Id),
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
id: sc.Id,
namespace: fmt.Sprintf("ns-%s", sc.Id),
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
clusterExtensionRevisionName: fmt.Sprintf("cer-%s", sc.Id),
}
return context.WithValue(ctx, scenarioContextKey, scCtx), nil
}
Expand Down Expand Up @@ -176,13 +178,16 @@ func ScenarioCleanup(ctx context.Context, _ *godog.Scenario, err error) (context
if sc.clusterExtensionName != "" {
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionName, kind: "clusterextension"})
}
if sc.clusterExtensionRevisionName != "" {
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionRevisionName, kind: "clusterextensionrevision"})
}
forDeletion = append(forDeletion, resource{name: sc.namespace, kind: "namespace"})
go func() {
for _, r := range forDeletion {
if _, err := k8sClient("delete", r.kind, r.name, "--ignore-not-found=true"); err != nil {
logger.Info("Error deleting resource", "name", r.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
for _, r := range forDeletion {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is this change related to the PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before the change, in my test runs, the after hook would only delete a single item in forDeletion (the clusterextension) and would leave remaining resources (namespaces and owner-less clusterextensionrevisions) on the cluster. This fixed that and also made the deletion run more quickly by executing them asynchronously.

go func(res resource) {
if _, err := k8sClient("delete", res.kind, res.name, "--ignore-not-found=true"); err != nil {
logger.Info("Error deleting resource", "name", res.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
}
}
}()
}(r)
}
return ctx, nil
}
Loading
Loading