Hi @johscheuer,
Thank for your answer.
My cluster now is running on local k3d ckuster and I am moving to aws eks cluster.
I use default fdb-kubernetes-operator
deployment and default cluster-fdb
from github. I change version to 7.1.15.
My cluster is created with default eksctl
command
eksctl create cluster --name k8s-cluster \
--region us-east-2 \
--zones us-east-2a,us-east-2b,us-east-2c \
--nodes 3 \
--node-zones us-east-2c \
--instance-types=t3.xlarge \
--ssh-access \
--ssh-public-key k8s-key \
--managed \
--nodegroup-name k8s-cluster
I got error and I read log but it did not enough information to debug.
Here is my yaml
file and error log.
apiVersion: v1
kind: ServiceAccount
metadata:
name: fdb-kubernetes-operator-controller-manager
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
creationTimestamp: null
name: fdb-kubernetes-operator-manager-role
rules:
- apiGroups:
- ""
resources:
- configmaps
- events
- persistentvolumeclaims
- pods
- secrets
- services
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- apps
resources:
- deployments
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- apps.foundationdb.org
resources:
- foundationdbbackups
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- apps.foundationdb.org
resources:
- foundationdbbackups/status
verbs:
- get
- patch
- update
- apiGroups:
- apps.foundationdb.org
resources:
- foundationdbclusters
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- apps.foundationdb.org
resources:
- foundationdbclusters/status
verbs:
- get
- patch
- update
- apiGroups:
- apps.foundationdb.org
resources:
- foundationdbrestores
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- apps.foundationdb.org
resources:
- foundationdbrestores/status
verbs:
- get
- patch
- update
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
creationTimestamp: null
name: fdb-kubernetes-operator-manager-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: fdb-kubernetes-operator-manager-role
subjects:
- kind: ServiceAccount
name: fdb-kubernetes-operator-controller-manager
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: fdb-kubernetes-operator-controller-manager
control-plane: controller-manager
name: fdb-kubernetes-operator-controller-manager
spec:
replicas: 1
selector:
matchLabels:
app: fdb-kubernetes-operator-controller-manager
template:
metadata:
labels:
app: fdb-kubernetes-operator-controller-manager
control-plane: controller-manager
spec:
containers:
- command:
- /manager
env:
- name: WATCH_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: foundationdb/fdb-kubernetes-operator:v1.12.0
name: manager
ports:
- containerPort: 8080
name: metrics
resources:
limits:
cpu: 500m
memory: 256Mi
requests:
cpu: 500m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /tmp
name: tmp
- mountPath: /var/log/fdb
name: logs
- mountPath: /usr/bin/fdb
name: fdb-binaries
initContainers:
- args:
- --copy-library
- "7.1"
- --copy-binary
- fdbcli
- --copy-binary
- fdbbackup
- --copy-binary
- fdbrestore
- --output-dir
- /var/output-files/7.1.15
- --init-mode
image: foundationdb/foundationdb-kubernetes-sidecar:7.1.15-1
name: foundationdb-kubernetes-init-7-1
volumeMounts:
- mountPath: /var/output-files
name: fdb-binaries
securityContext:
fsGroup: 4059
runAsGroup: 4059
runAsUser: 4059
serviceAccountName: fdb-kubernetes-operator-controller-manager
terminationGracePeriodSeconds: 10
volumes:
- emptyDir: {}
name: tmp
- emptyDir: {}
name: logs
- emptyDir: {}
name: fdb-binaries
Cluster
apiVersion: apps.foundationdb.org/v1beta2
kind: FoundationDBCluster
metadata:
name: fdb-cluster
spec:
automationOptions:
replacements:
enabled: true
faultDomain:
key: foundationdb.org/none
labels:
filterOnOwnerReference: false
matchLabels:
foundationdb.org/fdb-cluster-name: fdb-cluster
processClassLabels:
- foundationdb.org/fdb-process-class
processGroupIDLabels:
- foundationdb.org/fdb-process-group-id
minimumUptimeSecondsForBounce: 60
processCounts:
cluster_controller: 1
stateless: -1
processes:
general:
customParameters:
- knob_disable_posix_kernel_aio=1
podTemplate:
spec:
containers:
- name: foundationdb
resources:
requests:
cpu: 100m
memory: 128Mi
securityContext:
runAsUser: 0
- name: foundationdb-kubernetes-sidecar
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 100m
memory: 128Mi
securityContext:
runAsUser: 0
initContainers:
- name: foundationdb-kubernetes-init
resources:
limits:
cpu: 100m
memory: 128Mi
requests:
cpu: 100m
memory: 128Mi
securityContext:
runAsUser: 0
volumeClaimTemplate:
spec:
resources:
requests:
storage: 16G
routing:
headlessService: true
sidecarContainer:
enableLivenessProbe: true
enableReadinessProbe: false
useExplicitListenAddress: true
version: 7.1.15
And here is log
ubectl logs -f --all-containers --namespace default fdb-kubernetes-operator-controller-manager-558dff4978-jtf59
{"level":"info","ts":1674140978.2800398,"logger":"setup","msg":"Operator starting in single namespace mode","namespace":"default"}
{"level":"info","ts":1674140978.5891607,"logger":"controller-runtime.metrics","msg":"Metrics server is starting to listen","addr":":8080"}
{"level":"info","ts":1674140978.5906844,"logger":"setup","msg":"Moving FDB binary file","currentPath":"/usr/bin/fdb/7.1.15/bin/7.1.15/fdbcli","newPath":"/usr/bin/fdb/7.1/fdbcli"}
{"level":"info","ts":1674140978.5913315,"logger":"setup","msg":"Moving FDB binary file","currentPath":"/usr/bin/fdb/7.1.15/bin/7.1.15/fdbbackup","newPath":"/usr/bin/fdb/7.1/fdbbackup"}
{"level":"info","ts":1674140978.5913672,"logger":"setup","msg":"Moving FDB binary file","currentPath":"/usr/bin/fdb/7.1.15/bin/7.1.15/fdbrestore","newPath":"/usr/bin/fdb/7.1/fdbrestore"}
{"level":"info","ts":1674140978.5914366,"logger":"setup","msg":"Moving FDB library file","currentPath":"/usr/bin/fdb/7.1.15/lib/libfdb_c.so","newPath":"/usr/bin/fdb/libfdb_c_7.1.15.so"}
{"level":"info","ts":1674140978.592798,"logger":"setup","msg":"setup manager"}
{"level":"info","ts":1674140978.5933084,"msg":"Starting server","path":"/metrics","kind":"metrics","addr":"[::]:8080"}
{"level":"info","ts":1674140978.6942077,"msg":"attempting to acquire leader lease default/fdb-kubernetes-operator...\n"}
{"level":"info","ts":1674140997.3442132,"msg":"successfully acquired lease default/fdb-kubernetes-operator\n"}
{"level":"info","ts":1674140997.3444355,"msg":"Starting EventSource","controller":"foundationdbcluster","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBCluster","source":"kind source: *v1beta2.FoundationDBCluster"}
{"level":"info","ts":1674140997.344499,"msg":"Starting EventSource","controller":"foundationdbcluster","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBCluster","source":"kind source: *v1.Pod"}
{"level":"info","ts":1674140997.344508,"msg":"Starting EventSource","controller":"foundationdbcluster","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBCluster","source":"kind source: *v1.PersistentVolumeClaim"}
{"level":"info","ts":1674140997.3445222,"msg":"Starting EventSource","controller":"foundationdbcluster","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBCluster","source":"kind source: *v1.ConfigMap"}
{"level":"info","ts":1674140997.3445284,"msg":"Starting EventSource","controller":"foundationdbcluster","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBCluster","source":"kind source: *v1.Service"}
{"level":"info","ts":1674140997.3445196,"msg":"Starting EventSource","controller":"foundationdbbackup","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBBackup","source":"kind source: *v1beta2.FoundationDBBackup"}
{"level":"info","ts":1674140997.344577,"msg":"Starting EventSource","controller":"foundationdbbackup","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBBackup","source":"kind source: *v1.Deployment"}
{"level":"info","ts":1674140997.3445826,"msg":"Starting Controller","controller":"foundationdbbackup","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBBackup"}
{"level":"info","ts":1674140997.3445604,"msg":"Starting Controller","controller":"foundationdbcluster","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBCluster"}
{"level":"info","ts":1674140997.3447385,"msg":"Starting EventSource","controller":"foundationdbrestore","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBRestore","source":"kind source: *v1beta2.FoundationDBRestore"}
{"level":"info","ts":1674140997.3447611,"msg":"Starting Controller","controller":"foundationdbrestore","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBRestore"}
{"level":"info","ts":1674140997.4456177,"msg":"Starting workers","controller":"foundationdbrestore","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBRestore","worker count":1}
{"level":"info","ts":1674140997.4456224,"msg":"Starting workers","controller":"foundationdbcluster","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBCluster","worker count":1}
{"level":"info","ts":1674140997.445709,"msg":"Starting workers","controller":"foundationdbbackup","controllerGroup":"apps.foundationdb.org","controllerKind":"FoundationDBBackup","worker count":1}
{"level":"info","ts":1674141143.9275608,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.updateStatus"}
{"level":"info","ts":1674141143.953133,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.updateLockConfiguration"}
{"level":"info","ts":1674141143.953202,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.updateConfigMap"}
{"level":"info","ts":1674141143.9655888,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.checkClientCompatibility"}
{"level":"info","ts":1674141143.9656744,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.deletePodsForBuggification"}
{"level":"info","ts":1674141143.9657261,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.replaceMisconfiguredProcessGroups"}
{"level":"info","ts":1674141143.9657629,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.replaceFailedProcessGroups"}
{"level":"info","ts":1674141143.965837,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.addProcessGroups"}
{"level":"info","ts":1674141143.9881136,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.addServices"}
{"level":"info","ts":1674141144.0116718,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.addPVCs"}
{"level":"info","ts":1674141144.10706,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.addPods"}
{"level":"info","ts":1674141144.4174266,"logger":"controller","msg":"Attempting to run sub-reconciler","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.generateInitialClusterFile"}
{"level":"info","ts":1674141144.4174857,"logger":"controller","msg":"Generating initial cluster file","namespace":"default","cluster":"fdb-cluster","reconciler":"generateInitialClusterFile"}
{"level":"info","ts":1674141144.4177673,"logger":"controller","msg":"Reconciliation terminated early","namespace":"default","cluster":"fdb-cluster","subReconciler":"controllers.generateInitialClusterFile","requeueAfter":15,"message":"cannot find enough running Pods to recruit coordinators. Require 3, got 0 Pods"}
Could you please give me some advise to debug to pass over this?
Thank again