Kubernetes Operators: A Complete Development Guide
Kubernetes

Kubernetes Operators: A Complete Development Guide

Master Kubernetes Operators with this comprehensive guide covering custom controllers, operator patterns, and best practices for extending Kubernetes functionality

March 15, 2024
DevHub Team
6 min read

Kubernetes Operators: A Complete Development Guide

Kubernetes Operators extend the platform's functionality by automating complex application management tasks. This guide explores operator development patterns, implementation strategies, and best practices.

Operator Architecture

graph TB subgraph "Kubernetes API" A[Custom Resource Definition] B[Custom Controller] C[Reconciliation Loop] end subgraph "Application Logic" D[Resource Management] E[State Management] F[Lifecycle Hooks] end subgraph "Runtime" G[Operator Pod] H[Managed Resources] I[Status Updates] end A --> B B --> C C --> D D --> E E --> F F --> G G --> H H --> I I --> C classDef api fill:#1a73e8,stroke:#fff,color:#fff classDef logic fill:#34a853,stroke:#fff,color:#fff classDef runtime fill:#fbbc04,stroke:#fff,color:#fff class A,B,C api class D,E,F logic class G,H,I runtime

Custom Resource Definition

Basic CRD

# crd.yaml apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: databases.example.com spec: group: example.com names: kind: Database listKind: DatabaseList plural: databases singular: database shortNames: - db scope: Namespaced versions: - name: v1 served: true storage: true schema: openAPIV3Schema: type: object properties: spec: type: object properties: engine: type: string enum: ["postgres", "mysql", "mongodb"] version: type: string storage: type: string pattern: '^[0-9]+Gi$' replicas: type: integer minimum: 1 maximum: 5 required: - engine - version - storage status: type: object properties: phase: type: string enum: ["Pending", "Running", "Failed"] message: type: string

Custom Resource

# database.yaml apiVersion: example.com/v1 kind: Database metadata: name: production-db spec: engine: postgres version: "14.5" storage: "100Gi" replicas: 3

Operator Implementation

Controller Structure

// controller.ts import { KubernetesObject } from '@kubernetes/client-node'; import { Controller, ResourceEventType } from '@kubernetes/operator-framework'; interface DatabaseSpec { engine: string; version: string; storage: string; replicas: number; } interface DatabaseStatus { phase: 'Pending' | 'Running' | 'Failed'; message: string; } interface Database extends KubernetesObject { spec: DatabaseSpec; status: DatabaseStatus; } class DatabaseController implements Controller { private readonly client: any; constructor(client: any) { this.client = client; } async reconcile(obj: Database): Promise<void> { const { metadata, spec } = obj; try { // Create StatefulSet await this.createStatefulSet(metadata.name, spec); // Create Service await this.createService(metadata.name); // Update status await this.updateStatus(metadata.name, { phase: 'Running', message: 'Database is ready' }); } catch (error) { await this.updateStatus(metadata.name, { phase: 'Failed', message: error.message }); } } async cleanup(obj: Database): Promise<void> { const { metadata } = obj; // Cleanup resources await this.deleteStatefulSet(metadata.name); await this.deleteService(metadata.name); } }

Resource Management

// resources.ts import { V1StatefulSet, V1Service } from '@kubernetes/client-node'; class ResourceManager { createStatefulSet(name: string, spec: DatabaseSpec): V1StatefulSet { return { apiVersion: 'apps/v1', kind: 'StatefulSet', metadata: { name: `${name}-db` }, spec: { replicas: spec.replicas, selector: { matchLabels: { app: name } }, template: { metadata: { labels: { app: name } }, spec: { containers: [{ name: 'database', image: `${spec.engine}:${spec.version}`, ports: [{ containerPort: this.getPort(spec.engine) }], volumeMounts: [{ name: 'data', mountPath: '/data' }] }], volumes: [{ name: 'data', persistentVolumeClaim: { claimName: `${name}-pvc` } }] } } } }; } createService(name: string): V1Service { return { apiVersion: 'v1', kind: 'Service', metadata: { name: `${name}-svc` }, spec: { selector: { app: name }, ports: [{ port: 5432, targetPort: 5432 }] } }; } private getPort(engine: string): number { const ports = { postgres: 5432, mysql: 3306, mongodb: 27017 }; return ports[engine]; } }

Operator SDK

Project Setup

# PROJECT domain: example.com layout: - go.kubebuilder.io/v3 projectName: database-operator repo: github.com/example/database-operator version: "3" plugins: manifests.sdk.operatorframework.io/v2: {} scorecard.sdk.operatorframework.io/v2: {}

API Types

// api/v1/database_types.go package v1 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // DatabaseSpec defines the desired state type DatabaseSpec struct { Engine string `json:"engine"` Version string `json:"version"` Storage string `json:"storage"` Replicas int32 `json:"replicas"` } // DatabaseStatus defines the observed state type DatabaseStatus struct { Phase string `json:"phase"` Message string `json:"message"` } // Database is the Schema for the databases API type Database struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` Spec DatabaseSpec `json:"spec,omitempty"` Status DatabaseStatus `json:"status,omitempty"` }

Testing

Unit Tests

// controller.test.ts import { DatabaseController } from './controller'; import { MockKubeClient } from './mocks'; describe('DatabaseController', () => { let controller: DatabaseController; let client: MockKubeClient; beforeEach(() => { client = new MockKubeClient(); controller = new DatabaseController(client); }); test('should create resources on reconcile', async () => { const database = { metadata: { name: 'test-db' }, spec: { engine: 'postgres', version: '14.5', storage: '10Gi', replicas: 1 } }; await controller.reconcile(database); expect(client.getStatefulSet('test-db-db')).toBeDefined(); expect(client.getService('test-db-svc')).toBeDefined(); }); test('should cleanup resources', async () => { const database = { metadata: { name: 'test-db' } }; await controller.cleanup(database); expect(client.getStatefulSet('test-db-db')).toBeUndefined(); expect(client.getService('test-db-svc')).toBeUndefined(); }); });

Integration Tests

// controllers/database_controller_test.go package controllers import ( "context" "testing" "time" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) var _ = Describe("Database Controller", func() { Context("When creating Database", func() { It("Should create StatefulSet and Service", func() { ctx := context.Background() database := &DatabaseV1{ ObjectMeta: metav1.ObjectMeta{ Name: "test-db", Namespace: "default", }, Spec: DatabaseSpec{ Engine: "postgres", Version: "14.5", Storage: "10Gi", Replicas: 1, }, } Expect(k8sClient.Create(ctx, database)).Should(Succeed()) statefulSet := &appsv1.StatefulSet{} Eventually(func() bool { err := k8sClient.Get(ctx, types.NamespacedName{ Name: "test-db-db", Namespace: "default", }, statefulSet) return err == nil }, time.Second*10, time.Second).Should(BeTrue()) service := &corev1.Service{} Eventually(func() bool { err := k8sClient.Get(ctx, types.NamespacedName{ Name: "test-db-svc", Namespace: "default", }, service) return err == nil }, time.Second*10, time.Second).Should(BeTrue()) }) }) })

Deployment

Operator Deployment

# operator.yaml apiVersion: apps/v1 kind: Deployment metadata: name: database-operator spec: replicas: 1 selector: matchLabels: name: database-operator template: metadata: labels: name: database-operator spec: serviceAccountName: database-operator containers: - name: operator image: example.com/database-operator:v1.0.0 command: - database-operator imagePullPolicy: Always env: - name: WATCH_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: POD_NAME valueFrom: fieldRef: fieldPath: metadata.name - name: OPERATOR_NAME value: "database-operator"

RBAC Configuration

# rbac.yaml apiVersion: v1 kind: ServiceAccount metadata: name: database-operator --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: database-operator rules: - apiGroups: - "" resources: - pods - services - endpoints - persistentvolumeclaims verbs: - '*' - apiGroups: - apps resources: - deployments - statefulsets verbs: - '*' - apiGroups: - example.com resources: - databases - databases/status verbs: - '*' --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: database-operator subjects: - kind: ServiceAccount name: database-operator roleRef: kind: Role name: database-operator apiGroup: rbac.authorization.k8s.io

Best Practices

Implementation Guidelines

PracticeDescriptionBenefit
IdempotencyConsistent resultsReliability
Status UpdatesResource stateObservability
Error HandlingGraceful recoveryResilience

Troubleshooting Guide

Common Issues

IssueCauseSolution
Reconcile LoopResource conflictsCheck ownership
RBAC IssuesMissing permissionsUpdate roles
Resource LeaksCleanup failuresImplement finalizers

References

  1. Operator SDK Documentation
  2. Kubernetes Custom Resources
  3. Controller Runtime
  4. Operator Pattern
  5. Operator Best Practices
  6. Testing Operators

Related Posts

Kubernetes
Operators
Cloud Native
DevOps