Kubernetes
Kubernetes Operators: A Complete Development Guide
Master Kubernetes Operators with this comprehensive guide covering custom controllers, operator patterns, and best practices for extending Kubernetes functionality
March 15, 2024
DevHub Team
6 min read
Kubernetes Operators: A Complete Development Guide
Kubernetes Operators extend the platform's functionality by automating complex application management tasks. This guide explores operator development patterns, implementation strategies, and best practices.
Operator Architecture
graph TB
subgraph "Kubernetes API"
A[Custom Resource Definition]
B[Custom Controller]
C[Reconciliation Loop]
end
subgraph "Application Logic"
D[Resource Management]
E[State Management]
F[Lifecycle Hooks]
end
subgraph "Runtime"
G[Operator Pod]
H[Managed Resources]
I[Status Updates]
end
A --> B
B --> C
C --> D
D --> E
E --> F
F --> G
G --> H
H --> I
I --> C
classDef api fill:#1a73e8,stroke:#fff,color:#fff
classDef logic fill:#34a853,stroke:#fff,color:#fff
classDef runtime fill:#fbbc04,stroke:#fff,color:#fff
class A,B,C api
class D,E,F logic
class G,H,I runtime
Custom Resource Definition
Basic CRD
# crd.yaml apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: databases.example.com spec: group: example.com names: kind: Database listKind: DatabaseList plural: databases singular: database shortNames: - db scope: Namespaced versions: - name: v1 served: true storage: true schema: openAPIV3Schema: type: object properties: spec: type: object properties: engine: type: string enum: ["postgres", "mysql", "mongodb"] version: type: string storage: type: string pattern: '^[0-9]+Gi$' replicas: type: integer minimum: 1 maximum: 5 required: - engine - version - storage status: type: object properties: phase: type: string enum: ["Pending", "Running", "Failed"] message: type: string
Custom Resource
# database.yaml apiVersion: example.com/v1 kind: Database metadata: name: production-db spec: engine: postgres version: "14.5" storage: "100Gi" replicas: 3
Operator Implementation
Controller Structure
// controller.ts import { KubernetesObject } from '@kubernetes/client-node'; import { Controller, ResourceEventType } from '@kubernetes/operator-framework'; interface DatabaseSpec { engine: string; version: string; storage: string; replicas: number; } interface DatabaseStatus { phase: 'Pending' | 'Running' | 'Failed'; message: string; } interface Database extends KubernetesObject { spec: DatabaseSpec; status: DatabaseStatus; } class DatabaseController implements Controller { private readonly client: any; constructor(client: any) { this.client = client; } async reconcile(obj: Database): Promise<void> { const { metadata, spec } = obj; try { // Create StatefulSet await this.createStatefulSet(metadata.name, spec); // Create Service await this.createService(metadata.name); // Update status await this.updateStatus(metadata.name, { phase: 'Running', message: 'Database is ready' }); } catch (error) { await this.updateStatus(metadata.name, { phase: 'Failed', message: error.message }); } } async cleanup(obj: Database): Promise<void> { const { metadata } = obj; // Cleanup resources await this.deleteStatefulSet(metadata.name); await this.deleteService(metadata.name); } }
Resource Management
// resources.ts import { V1StatefulSet, V1Service } from '@kubernetes/client-node'; class ResourceManager { createStatefulSet(name: string, spec: DatabaseSpec): V1StatefulSet { return { apiVersion: 'apps/v1', kind: 'StatefulSet', metadata: { name: `${name}-db` }, spec: { replicas: spec.replicas, selector: { matchLabels: { app: name } }, template: { metadata: { labels: { app: name } }, spec: { containers: [{ name: 'database', image: `${spec.engine}:${spec.version}`, ports: [{ containerPort: this.getPort(spec.engine) }], volumeMounts: [{ name: 'data', mountPath: '/data' }] }], volumes: [{ name: 'data', persistentVolumeClaim: { claimName: `${name}-pvc` } }] } } } }; } createService(name: string): V1Service { return { apiVersion: 'v1', kind: 'Service', metadata: { name: `${name}-svc` }, spec: { selector: { app: name }, ports: [{ port: 5432, targetPort: 5432 }] } }; } private getPort(engine: string): number { const ports = { postgres: 5432, mysql: 3306, mongodb: 27017 }; return ports[engine]; } }
Operator SDK
Project Setup
# PROJECT domain: example.com layout: - go.kubebuilder.io/v3 projectName: database-operator repo: github.com/example/database-operator version: "3" plugins: manifests.sdk.operatorframework.io/v2: {} scorecard.sdk.operatorframework.io/v2: {}
API Types
// api/v1/database_types.go package v1 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // DatabaseSpec defines the desired state type DatabaseSpec struct { Engine string `json:"engine"` Version string `json:"version"` Storage string `json:"storage"` Replicas int32 `json:"replicas"` } // DatabaseStatus defines the observed state type DatabaseStatus struct { Phase string `json:"phase"` Message string `json:"message"` } // Database is the Schema for the databases API type Database struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec DatabaseSpec `json:"spec,omitempty"` Status DatabaseStatus `json:"status,omitempty"` }
Testing
Unit Tests
// controller.test.ts import { DatabaseController } from './controller'; import { MockKubeClient } from './mocks'; describe('DatabaseController', () => { let controller: DatabaseController; let client: MockKubeClient; beforeEach(() => { client = new MockKubeClient(); controller = new DatabaseController(client); }); test('should create resources on reconcile', async () => { const database = { metadata: { name: 'test-db' }, spec: { engine: 'postgres', version: '14.5', storage: '10Gi', replicas: 1 } }; await controller.reconcile(database); expect(client.getStatefulSet('test-db-db')).toBeDefined(); expect(client.getService('test-db-svc')).toBeDefined(); }); test('should cleanup resources', async () => { const database = { metadata: { name: 'test-db' } }; await controller.cleanup(database); expect(client.getStatefulSet('test-db-db')).toBeUndefined(); expect(client.getService('test-db-svc')).toBeUndefined(); }); });
Integration Tests
// controllers/database_controller_test.go package controllers import ( "context" "testing" "time" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) var _ = Describe("Database Controller", func() { Context("When creating Database", func() { It("Should create StatefulSet and Service", func() { ctx := context.Background() database := &DatabaseV1{ ObjectMeta: metav1.ObjectMeta{ Name: "test-db", Namespace: "default", }, Spec: DatabaseSpec{ Engine: "postgres", Version: "14.5", Storage: "10Gi", Replicas: 1, }, } Expect(k8sClient.Create(ctx, database)).Should(Succeed()) statefulSet := &appsv1.StatefulSet{} Eventually(func() bool { err := k8sClient.Get(ctx, types.NamespacedName{ Name: "test-db-db", Namespace: "default", }, statefulSet) return err == nil }, time.Second*10, time.Second).Should(BeTrue()) service := &corev1.Service{} Eventually(func() bool { err := k8sClient.Get(ctx, types.NamespacedName{ Name: "test-db-svc", Namespace: "default", }, service) return err == nil }, time.Second*10, time.Second).Should(BeTrue()) }) }) })
Deployment
Operator Deployment
# operator.yaml apiVersion: apps/v1 kind: Deployment metadata: name: database-operator spec: replicas: 1 selector: matchLabels: name: database-operator template: metadata: labels: name: database-operator spec: serviceAccountName: database-operator containers: - name: operator image: example.com/database-operator:v1.0.0 command: - database-operator imagePullPolicy: Always env: - name: WATCH_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name - name: OPERATOR_NAME value: "database-operator"
RBAC Configuration
# rbac.yaml apiVersion: v1 kind: ServiceAccount metadata: name: database-operator --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: database-operator rules: - apiGroups: - "" resources: - pods - services - endpoints - persistentvolumeclaims verbs: - '*' - apiGroups: - apps resources: - deployments - statefulsets verbs: - '*' - apiGroups: - example.com resources: - databases - databases/status verbs: - '*' --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: database-operator subjects: - kind: ServiceAccount name: database-operator roleRef: kind: Role name: database-operator apiGroup: rbac.authorization.k8s.io
Best Practices
Implementation Guidelines
Practice | Description | Benefit |
---|---|---|
Idempotency | Consistent results | Reliability |
Status Updates | Resource state | Observability |
Error Handling | Graceful recovery | Resilience |
Troubleshooting Guide
Common Issues
Issue | Cause | Solution |
---|---|---|
Reconcile Loop | Resource conflicts | Check ownership |
RBAC Issues | Missing permissions | Update roles |
Resource Leaks | Cleanup failures | Implement finalizers |
References
- Operator SDK Documentation
- Kubernetes Custom Resources
- Controller Runtime
- Operator Pattern
- Operator Best Practices
- Testing Operators
Related Posts
- GitOps Tools Comparison - Modern deployment
- DevOps AI Integration - AI in DevOps
- Platform Engineering - Modern platforms
- DevSecOps Implementation - Security integration
Kubernetes
Operators
Cloud Native
DevOps