LOADING

加载过慢请开启缓存 浏览器默认开启

Kubernetes二次开发——编写CRD及Operator

kubebuilder中文文档

https://xuejipeng.github.io/kubebuilder-doc-cn/introduction.html

kubebuilder-operator-example

使用kubebuilder构建的XyDaemonset控制器,通过配置镜像、副本数、启动命令,在集群每个节点部署node,并实现根据副本数扩缩容

环境 Environment

  • kubebuilder: v4.4.0
  • kubernetes: v1.27.1

项目初始化

  • Kubebuilder 是一个基于 CRD 来构建 Kubernetes API 的框架,可以使用 CRD 来构建 API、Controller 和 Admission Webhook。
# 初始化项目骨架
kubebuilder init --domain xy.io

# 初始化 CRD,两个确认都是 y
kubebuilder create api --group xytest --version v1 --kind XyDaemonset

定义CRD参数及结构

type XyDaemonsetSpec struct {
Image    string   `json:"image"`
Command  []string `json:"command"`
Replicas int      `json:"replicas,omitempty"`
}

type XyDaemonsetStatus struct {
AvailableReplicas int      `json:"availableReplicas"`
PodNames          []string `json:"podNames"`
AutoScalingStatus string   `json:"autoScalingStatus"`
}

type XyDaemonset struct {
metav1.TypeMeta   `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec   XyDaemonsetSpec   `json:"spec,omitempty"`
Status XyDaemonsetStatus `json:"status,omitempty"`
}
  • 执行命令如下命令生成 CRD 定义的 yaml 文件,生成文件位置在 config/crd/bases/xytest.xy.io_xydaemonsets.yaml
make manifests
  • 之后使用 kubectl 将此 CRD 定义 apply 到 k8s 上
kubectl apply -f config/crd/bases/xytest.xy.io_xydaemonsets.yaml
  • 使用以下命令检查CRD是否apply成功
kubectl get crd | grep xydaemonsets

编写Controller

  • /internal/controller/xydaemonset_controller.go中添加node\pod所需权限
// Add permissions to the controller
// +kubebuilder:rbac:groups=xytest.xy.io,resources=xydaemonsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=xytest.xy.io,resources=xydaemonsets/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=xytest.xy.io,resources=xydaemonsets/finalizers,verbs=update
// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=pods/status,verbs=get
  • /internal/controller/xydaemonset_controller.go中添加controller逻辑,Kubernetes控制器会监视资源的创建/更新/删除事件,并触发 Reconcile 函数作为响应。
func (r *XyDaemonsetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
	_ = log.FromContext(ctx)
	log.Log.Info("XyDaemonset Controller start reconcile")
	// fetch the XyDaemonset instance
	instance := &xytestv1.XyDaemonset{}
	if err := r.Client.Get(ctx, req.NamespacedName, instance); err != nil {
		if errors.IsNotFound(err) {
			return ctrl.Result{}, nil
		}
		return ctrl.Result{}, err
	}

	if instance.Status.AutoScalingStatus == "Running" {
		log.Log.Info("AutoScaling Running")
		return ctrl.Result{Requeue: true}, nil
	}

	specPodLength := instance.Spec.Replicas
	//获取当前集群所有节点
	allNodeList := &corev1.NodeList{}
	if err := r.Client.List(ctx, allNodeList); err != nil {
		return ctrl.Result{}, err
	}

	currentStatus := xytestv1.XyDaemonsetStatus{}

	for _, node := range allNodeList.Items {
		existingPods, err := r.fetchPodLenAndStatus(ctx, instance, node.Name)
		if err != nil {
			return ctrl.Result{}, err
		}
		existingPodNames, existingPodLength := r.filterPods(existingPods.Items)
		if specPodLength != existingPodLength {
			if err := r.markRunning(instance, ctx); err != nil {
				return ctrl.Result{}, err
			}

			// 比期望值小,需要 scale up create
			if specPodLength > existingPodLength {
				log.Log.Info(fmt.Sprintf("creating pod, current num %d < expected num %d", existingPodLength, specPodLength))
				pod := buildPod(instance, node.Name)
				if err := controllerutil.SetControllerReference(instance, pod, r.Scheme); err != nil {
					log.Log.Error(err, "scale up failed: SetControllerReference")
					return ctrl.Result{}, err
				}
				if err := r.Client.Create(ctx, pod); err != nil {
					log.Log.Error(err, "scale up failed: create pod")
					return ctrl.Result{}, err
				}
				existingPodNames = append(existingPodNames, pod.Name)
				existingPodLength += 1
			}

			// 比期望值大,需要 scale down delete
			if specPodLength < existingPodLength {
				log.Log.Info(fmt.Sprintf("deleting pod, current num %d > expected num %d", existingPodLength, specPodLength))
				pod := existingPods.Items[0]
				existingPods.Items = existingPods.Items[1:]
				existingPodNames = removeString(existingPodNames, pod.Name)
				existingPodLength -= 1
				if err := r.Client.Delete(ctx, &pod); err != nil {
					log.Log.Error(err, "scale down faled")
					return ctrl.Result{}, err
				}
			}
		}
		currentStatus.AvailableReplicas += existingPodLength
		currentStatus.PodNames = append(currentStatus.PodNames, existingPodNames...)
	}
	// 更新当前instance状态
	if instance.Status.AvailableReplicas != currentStatus.AvailableReplicas || !(reflect.DeepEqual(instance.Status.PodNames, currentStatus.PodNames)) {
		log.Log.Info("instance.Status.AvailableReplicas")
		log.Log.Info(fmt.Sprint(instance.Status.PodNames))
		log.Log.Info("currentStatus.PodNames")
		log.Log.Info(fmt.Sprint(currentStatus.PodNames))
		log.Log.Info(fmt.Sprintf("更新当前instance状态, instance.Status.AvailableReplicas %d : currentStatus.AvailableReplicas %d", instance.Status.AvailableReplicas, currentStatus.AvailableReplicas))
		currentStatus.AutoScalingStatus = "Sleep"
		instance.Status = currentStatus
		if err := r.Client.Status().Update(ctx, instance); err != nil {
			log.Log.Error(err, "update pod failed")
			return ctrl.Result{}, err
		}
	}
	return ctrl.Result{Requeue: true}, nil
}
  • 编写测试脚本,本文暂未完善测试脚本,建议按Kubernetes官方要求根据定制CRD逻辑完善测试脚本
  • 执行如下命令在本地运行 controller
make run

Build Controller

  • kubebuilder默认使用kustomize构建项目,可先修改/config/default/kustomization.yaml,配置controller所在namespace和前缀
# Adds namespace to all resources.
namespace: xy-diy

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
namePrefix: xy-
  • 源码部署:将项目代码传至linux服务器,打包镜像并push到镜像仓库,make deploy部署到kubernetes集群
make build
make docker-build
make docker-push
make deploy
  • 本地打包部署:若本地开发环境为windows,可参考Makefile本地打包镜像,并在服务器使用kubectl apply -f manager.yaml
.PHONY: docker-build
docker-build: ## Build docker image with the manager.
	$(CONTAINER_TOOL) build -t ${IMG} .

.PHONY: docker-push
docker-push: ## Push docker image with the manager.
	$(CONTAINER_TOOL) push ${IMG}
  • make deploy成功后可参考以下命令查看controller运行情况
root@k8s-master:~# kubectl get all -n xy-diy
NAME                                         READY   STATUS        RESTARTS   AGE
pod/xy-controller-manager-55db4ff865-xj8mv   1/1     Running       0          3m1s

NAME                                            TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)    AGE
service/xy-controller-manager-metrics-service   ClusterIP   10.96.227.136   <none>        8443/TCP   10m

NAME                                    READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/xy-controller-manager   1/1     1            1           10m

NAME                                               DESIRED   CURRENT   READY   AGE
replicaset.apps/xy-controller-manager-55db4ff865   1         1         1       3m1s
root@k8s-master:~# kubectl logs -f --tail 500 pod/xy-controller-manager-55db4ff865-xj8mv -n xy-diy 
2025-02-14T03:32:39Z    INFO    setup   starting manager
2025-02-14T03:32:39Z    INFO    controller-runtime.metrics      Starting metrics server
2025-02-14T03:32:39Z    INFO    setup   disabling http/2
2025-02-14T03:32:39Z    INFO    starting server {"name": "health probe", "addr": "[::]:8081"}
I0214 03:32:39.669040       1 leaderelection.go:254] attempting to acquire leader lease xy-diy/54323e1d.xy.io...
2025-02-14T03:32:40Z    INFO    controller-runtime.metrics      Serving metrics server  {"bindAddress": ":8443", "secure": true}
I0214 03:33:01.666350       1 leaderelection.go:268] successfully acquired lease xy-diy/54323e1d.xy.io
2025-02-14T03:33:01Z    DEBUG   events  xy-controller-manager-55db4ff865-xj8mv_c5ba8639-d412-4767-bc62-9b4af2f69d8e became leader       {"type": "Normal", "object": {"kind":"Lease","namespace":"xy-diy","name":"54323e1d.xy.io","uid":"19ecc54d-7055-4efa-b584-28ed0917e025","apiVersion":"coordination.k8s.io/v1","resourceVersion":"52604"}, "reason": "LeaderElection"}
2025-02-14T03:33:01Z    INFO    Starting EventSource    {"controller": "xydaemonset", "controllerGroup": "xytest.xy.io", "controllerKind": "XyDaemonset", "source": "kind source: *v1.XyDaemonset"}
2025-02-14T03:33:01Z    INFO    Starting Controller     {"controller": "xydaemonset", "controllerGroup": "xytest.xy.io", "controllerKind": "XyDaemonset"}
2025-02-14T03:33:01Z    INFO    Starting workers        {"controller": "xydaemonset", "controllerGroup": "xytest.xy.io", "controllerKind": "XyDaemonset", "worker count": 1}

测试示例参数说明

  • 构建测试CR
apiVersion: xytest.xy.io/v1
kind: XyDaemonset
metadata:
labels:
app.kubernetes.io/name: data
app.kubernetes.io/managed-by: kustomize
name: xydaemonset-busybox
namespace: xytest
spec:
command: ["sleep", "3600"] # 容器启动命令
image: busybox  #容器镜像
replicas: 1 #每个节点上的副本数
  • 部署测试CR
kubectl apply -f /config/samples/xytest_v1_xydaemonset.yaml
kubectl get xydaemonset -n xy-diy -o yaml
apiVersion: v1
items:
- apiVersion: xytest.xy.io/v1
  kind: XyDaemonset
  metadata:
    annotations:
      kubectl.kubernetes.io/last-applied-configuration: |
        {"apiVersion":"xytest.xy.io/v1","kind":"XyDaemonset","metadata":{"annotations":{},"labels":{"app.kubernetes.io/managed-by":"kustomize","app.kubernetes.io/name":"data"},"name":"xydaemonset-busybox","namespace":"xy-diy"},"spec":{"command":["sleep","3600"],"image":"busybox","replicas":1}}
    creationTimestamp: "2025-02-14T05:28:25Z"
    generation: 1
    labels:
      app.kubernetes.io/managed-by: kustomize
      app.kubernetes.io/name: data
    name: xydaemonset-busybox
    namespace: xy-diy
    resourceVersion: "73897"
    uid: db111ed0-93bd-4879-a9a3-02e26a846957
  spec:
    command:
    - sleep
    - "3600"
    image: busybox
    replicas: 1
  status:
    autoScalingStatus: Sleep
    availableReplicas: 3
    podNames:
    - xydaemonset-busybox-72zqm
    - xydaemonset-busybox-c748t
    - xydaemonset-busybox-9jwzp
kind: List
metadata:
  resourceVersion: ""

完整代码

https://github.com/Stringxy/kubebuilder-operator-example