Skip to content

Troubleshooting Guide

Orion Logo Orion Logo


Overview

This guide provides helpful kubectl commands for general cluster troubleshooting

Connect to ArgoCD

# Port forward argocd-server. Providing access to the argoCD GUI.
# View logs, sync status, deployed services etc.
# example connection: localhost:8080/argocd
kubectl port-forward svc/argocd-server -n argocd 8080:443

General Cluster Checks

# Quick cluster health check
kubectl cluster-info

# API version
kubectl version --short

Nodes

# Get list of all nodes
kubectl get nodes 

# describe node
kubectl describe node <node-name>

# Verify node health
kubectl describe nodes | grep -i "ready\|schedulable"

# Check system resource usage
kubectl top nodes

Pods

# Get all pods in all namespaces
kubectl get pods -A

# Get all pods in a specific name space
kubectl get pods -n <namespace>

# describe a specific pod
kubectl describe pod <pod-name> -n <namespace>

# Get a pods logs
kubectl logs <pod-name> -n <namespace>

Deployments

# Get deployments in a specific namespace
kubectl get deploy -n <namespace>

# Get deployments in all namespaces
kubectl get deploy -A

# describe a specific deployment
kubectl describe deploy <deployment-name> -n <namespace>

# Force restart all Orion deployments
kubectl rollout restart deployment -n argocd

# Force restart a specific deployment
kubectl rollout restart deployment/<deployment-name> -n <namespace>

Storage

# Check persistent volumes
kubectl get pv
kubectl get pvc -A

# Verify storage class
kubectl get storageclass

Networking and Ingress

# Check ingress controller status
kubectl get pods -A -l app.kubernetes.io/name=ingress-nginx

# Get ingress from specific namespace
kubectl get ingress -n <namespace> -o yaml

# Get all network policies
kubectl get networkpolicy -A

# describe a specific network policy
kubectl describe networkpolicy <policy> -n <namespace>

Events

# Get all events from a specific namespace
kubectl get events -n <namespace>