...
Committing Code
Code Block |
---|
|
# clone
git clone ssh://michaelobrien@gerrit.onap.org:29418/logging-analytics
# modify files
# stage your changes
git add .
git commit -m "your commit message"
# commit your staged changes with sign-off
git commit -s --amend
# add Issue-ID after Change-ID
# Submit your commit to ONAP Gerrit for review
git review
# goto https://gerrit.onap.org/r/#/dashboard/self |
...
Workstation configuration
Ubuntu 16.04 on VMware Workstation
...
15 or Fusion 8 or AWS/Azure VM
Note: do not use the gui upgrade (will cause the vm to periodically lock) - do individual apt-get 's
Code Block |
---|
language | bash |
---|
theme | Midnight |
---|
|
# start with clean VM, I use root, you can use the recommended non-root account
sudo vi /etc/hosts
# add your hostname to ::1 and 127.0.0.1 or each sudo command will hang for up to 10 sec on DNS resolution especially on ubuntu 18.04
sudo apt-get update
sudo apt-get install openjdk-8-jdk
# not in headless vm
sudo apt-get install ubuntu-desktop
#sudo apt-get install git
sudo apt-get install maven
#or
sudo wget http://apache.mirror.gtcomm.net/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.tar.gz
sudo cp ap(tab) /opt
cd /opt
tar -xvf apache-maven-3.5.4-bin.tar.gz
sudo vi /etc/environment
MAVEN_OPTS="-Xms8192 -Djava.net.preferIPv4Stack=true"
# restart the terminal
ubuntu@ip-172-31-78-76:~$ mvn -version
Apache Maven 3.5.4 (1edded0938998edf8bf061f1ceb3cfdeccf443fe; 2018-06-17T18:33:14Z)
Maven home: /opt/apache-maven-3.5.4
Java version: 1.8.0_171, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-8-openjdk-amd64/jre
sudo vi ~/.ssh/config
Host *
StrictHostKeyChecking no
UserKnownHostsFile=/dev/null
# scp onap gerrit cert into VM # a couple options on copying the ssh key
# from hostanother macbookmachine
obrien:obrienlabs amdocs$ scp root@ubuntu:~/_dev# cat ~/.ssh/onapid_rsa amdocs@192.168.211.129:~/
move to root
sudo su -
root@obriensystemsu0:~# cp /home/amdocs| ssh -i ~/.ssh/onap_rsa .
ls /home/amdocs/.m2
cp onap_rsa ~/ubuntu@ons.onap.info 'cat >> .ssh/idonap_rsa chmod&& 400 ~/.ssh/id_rsa
# test your gerrit access
sudo git config --global --add gitreview.username michaelobrien
sudo git config --global user.email frank.obrien@amdocs.com
sudo git config --global user.name "Michael OBrien"
sudo git config --global gitreview.remote origin
sudo mkdir log-326-rancher-ver
cd log-326-rancher-ver/
sudo git clone ssh://michaelobrien@gerrit.onap.org:29418/logging-analytics
cd logging-analytics/
sudo vi deploy/rancher/oom_rancher_setup.sh
sudo git add deploy/rancher/oom_rancher_setup.sh .
# setup git-review
sudo apt-get install git-review
sudo echo "Key copied"'
Key copied
sudo chown ubuntu:ubuntu ~/.ssh/onap_rsa
# or
# scp onap gerrit cert into VM from host macbook
obrien:obrienlabs amdocs$ scp ~/.ssh/onap_rsa amdocs@192.168.211.129:~/
move to root
sudo su -
root@obriensystemsu0:~# cp /home/amdocs/onap_rsa .
ls /home/amdocs/.m2
cp onap_rsa ~/.ssh/id_rsa
chmod 400 ~/.ssh/id_rsa
# move from root to ubuntu - if using non-root user
sudo chown ubuntu:ubuntu ~/.ssh/onap_rsa
# test your gerrit access
sudo git config --global --add gitreview.remote origin
# upload a patch
username michaelobrien
sudo git commitconfig -am "update rancher version to 1.6.18"
# 2nd line should be "Issue-ID: LOG-326"
sudo git commit -s --amend
sudo git review
Your change was committed before the commit hook was installed.
Amending the commit to add a gerrit change id.
remote: Processing changes: new: 1, refs: 1, done
remote: New Changes:
remote: -global user.email frank.obrien@amdocs.com
sudo git config --global user.name "Michael OBrien"
sudo git config --global gitreview.remote origin
sudo mkdir log-326-rancher-ver
cd log-326-rancher-ver/
sudo git clone ssh://michaelobrien@gerrit.onap.org:29418/logging-analytics
cd logging-analytics/
sudo vi deploy/rancher/oom_rancher_setup.sh
sudo git add deploy/rancher/oom_rancher_setup.sh .
# setup git-review
sudo apt-get install git-review
sudo git config --global gitreview.remote origin
# upload a patch
sudo git commit -am "update rancher version to 1.6.18"
# 2nd line should be "Issue-ID: LOG-326"
sudo git commit -s --amend
sudo git review
Your change was committed before the commit hook was installed.
Amending the commit to add a gerrit change id.
remote: Processing changes: new: 1, refs: 1, done
remote: New Changes:
remote: https://gerrit.onap.org/r/55299 update rancher version to 1.6.18
remote:
To ssh://michaelobrien@gerrit.onap.org:29418/logging-analytics
* [new branch] HEAD -> refs/publish/master
# see
https://gerrit.onap.org/r/#/c/55299/
if you get a corrupted FS type "fsck -y /dev/sda1" |
...
https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands
...
Use a
...
different kubectl context
Code Block |
---|
language | bash |
---|
|
#kubectl automatically via cd.sh in LOG-326
# get the dev.yaml and set any pods you want up to true as well as fill out the openstack parameters
sudo wget https://git.onap.org/oom/plain/kubernetes/onap/resources/environments/dev.yaml
sudo cp logging-analytics/deploy/cd.sh .
# or
# manually
cd oom/kubernetes/
sudo make clean
sudo make all
sudo make onap
sudo helm install local/onap -n onap --namespace onap -f onap/resources/environments/disable-allcharts.yaml --set log.enabled=true
# adding another (so)
sudo helm upgrade local/onap --namespace onap -f onap/resources/environments/disable-allcharts.yaml --set so.enabled=true --set log.enabled=true |
Get the nodeport of a particular service
https://kubernetes.io/docs/reference/kubectl/jsonpath/
Code Block |
---|
|
# human readable list
kubectl get services --all-namespaces | grep robot
# machine readable number - via JSONPath
kubectl get --namespace onap -o jsonpath="{.spec.ports[0].nodePort}" services robot) |
Test DNS URLS in the kubernetes ONAP namespace
Code Block |
---|
|
test urls in the robot container
wget http://pomba-sdcctxbuilder.onap:9530/sdccontextbuilder/health
wget http://pomba-networkdiscoveryctxbuilder.onap:9530/ndcontextbuilder/health |
...
Code Block |
---|
language | bash |
---|
theme | Midnight |
---|
|
# override global docker pull policy for a single component
# set in oom/kubernetes/onap/values.yaml
# use global.pullPolicy in your -f yaml or a --set
|
Exec into a container of a pod with multiple containers
Code Block |
---|
|
# for
onap logdemonode-logdemonode-5c8bffb468-dhzcc 2/2 Running 0 1m
# use
kubectl exec -it logdemonode-logdemonode-5c8bffb468-dhzcc -n onap -c logdemonode bash |
Push a file into a Kubernetes container/pod
Code Block |
---|
|
opy files from the vm to the robot container - to avoid buiding a new robot image
root@ubuntu:~/_dev/62405_logback/testsuite/robot/testsuites# kubectl cp health-check.robot onap-robot-7c84f54558-f8mw7: -n onap
root@ubuntu:~/_dev/62405_logback/testsuite/robot/testsuites# kubectl cp ../resources/pomba_interface.robot onap-robot-7c84f54558-f8mw7: -n onap
move the files in the robot container to the proper dir
root@onap-robot-7c84f54558-f8mw7:/# cp health-check.robot /var/opt/OpenECOMP_ETE/robot/testsuites/
root@onap-robot-7c84f54558-f8mw7:/# ls
bin boot dev etc health-check.robot home lib lib64 media mnt opt pomba_interface.robot proc root run sbin share srv sys tmp usr var
root@onap-robot-7c84f54558-f8mw7:/# cp pomba_interface.robot /var/opt/OpenECOMP_ETE/robot/resources/
retest health
root@ubuntu:~/_dev/62405_logback/oom/kubernetes/robot# ./ete-k8s.sh onap health
and directly in the robot container
wget http://pomba-sdcctxbuilder.onap:9530/sdccontextbuilder/health
wget http://pomba-networkdiscoveryctxbuilder.onap:9530/ndcontextbuilder/health |
Restarting a container
Restarting a pod
If you change configuration like the logback.xml in a pod or would like restart an entire pod like the log and portal pods
Code Block |
---|
language | bash |
---|
theme | Midnight |
---|
|
cd oom/kubernetes
# do a make if anything is modified in your charts
sudo make all
#sudo make onap
ubuntu@ip-172-31-19-23:~/oom/kubernetes$ sudo helm upgrade -i onap local/onap --namespace onap --set log.enabled=false
# wait and check in another terminal for all containers to terminate
ubuntu@ip-172-31-19-23:~$ --kubeconfig ~/.kube/config2 get pods --all-namespaces |
Adding user kubectl accounts
Normally you don't use the admin account directly when working with particular namespaces. Details on how to create a user token and the appropriate role bindings.
Code Block |
---|
|
# TODO: create a script out of this
# create a namespace
# https://kubernetes.io/docs/tasks/administer-cluster/namespaces-walkthrough/#create-new-namespaces
vi mobrien_namespace.yaml
{
"kind": "Namespace",
"apiVersion": "v1",
"metadata": {
"name": "mobrien",
"labels": {
"name": "mobrien"
}
}
}
kubectl create -f mobrien_namespace.yaml
# or
kubectl --kubeconfig ~/.kube/admin create ns mobrien
namespace "mobrien" created
# service account
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create sa mobrien
serviceaccount "mobrien" created
# rolebinding mobrien
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create rolebinding mobrien-mobrien-privilegedpsp --clusterrole=privilegedpsp --serviceaccount=mobrien:mobrien
rolebinding "mobrien-mobrien-privilegedpsp" created
# rolebinding default
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create rolebinding mobrien-default-privilegedpsp --clusterrole=privilegedpsp --serviceaccount=mobrien:default
rolebinding "mobrien-default-privilegedpsp" created
# rolebinding admin
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create rolebinding mobrien-mobrien-admin --clusterrole=admin --serviceaccount=mobrien:mobrien
rolebinding "mobrien-mobrien-admin" created
# rolebinding persistent-volume-role
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create clusterrolebinding mobrien-mobrien-persistent-volume-role --clusterrole=persistent-volume-role --serviceaccount=mobrien:mobrien
clusterrolebinding "mobrien-mobrien-persistent-volume-role" created
# rolebinding default-persistent-volume-role
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create clusterrolebinding mobrien-default-persistent-volume-role --clusterrole=persistent-volume-role --serviceaccount=mobrien:default
clusterrolebinding "mobrien-default-persistent-volume-role" created
# rolebinding helm-pod-list
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create clusterrolebinding mobrien-mobrien-helm-pod-list --clusterrole=helm-pod-list --serviceaccount=mobrien:mobrien
clusterrolebinding "mobrien-mobrien-helm-pod-list" created
# rolebinding default-helm-pod-list
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien create clusterrolebinding mobrien-default-helm-pod-list --clusterrole=helm-pod-list --serviceaccount=mobrien:default
clusterrolebinding "mobrien-default-helm-pod-list" created
# get the serviceAccount and extract the token to place into a config yaml
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien get sa
NAME SECRETS AGE
default 1 20m
mobrien 1 18m
kubectl --kubeconfig ~/.kube/admin --namespace=mobrien describe serviceaccount mobrien
Name: mobrien
Namespace: mobrien
Labels: <none>
Annotations: <none>
Image pull secrets: <none>
Mountable secrets: mobrien-token-v9z5j
Tokens: mobrien-token-v9z5j
TOKEN=$(kubectl --kubeconfig ~/.kube/admin --namespace=mobrien describe secrets "$(kubectl --kubeconfig ~/.kube/admin --namespace=mobrien describe serviceaccount mobrien | grep -i Tokens | awk '{print $2}')" | grep token: | awk '{print $2}')
echo $TOKEN
eyJO....b3VudC
# put this in your ~/.kube/config and edit the namespace
|
see also https://stackoverflow.com/questions/44948483/create-user-in-kubernetes-for-kubectl
Helm on Rancher unauthorized
Cycle the RBAC to Github off/on if you get any security issue running helm commands
Code Block |
---|
|
ubuntu@a-ons1-master:~$ watch kubectl get pods --all-namespaces
| grep onap-log
onap onap-log-elasticsearch-7557486bc4-5mng9 ubuntu@a-ons1-master:~$ sudo helm list
Error: Unauthorized
ubuntu@a-ons1-master:~$ sudo helm list
NAME REVISION UPDATED 0/1 CrashLoopBackOff 9 STATUS CHART 29m NAMESPACE
onap 4 onap-log-kibana-fc88b6b79-nt7sd Thu Mar 7 13:03:29 2019 DEPLOYED onap-3.0.0 onap
onap-dmaap 1/ 1 Running Thu Mar 7 13:03:32 2019 DEPLOYED dmaap-3.0.0 onap 0 35m
onap onap-log-logstash-c5z4d |
Working with JSONPath
https://kubernetes.io/docs/reference/kubectl/jsonpath/
Fortunately we can script most of what we can query from the state of our kubernetes deployment using JSONPath. We can then use jq to do additional processing to get values as an option.
Get the full json output to design JSONPath queries
Jira Legacy |
---|
server | System Jira |
---|
serverId | 4733707d-2057-3a0f-ae5e-4fd8aff50176 |
---|
key | LOG-914 |
---|
|
Code Block |
---|
|
kubectl get pods --all-namespaces -o json
# we are looking to shutdown a rogue pod that is not responding to the normal deletion commands - but it contains a generated name
onap onap-portal-portal-sdk-7c49c97955-smbws 10/12 Terminating 0 0 2d
ubuntu@onap-oom-obrien-rancher-e0:~$ kubectl get 4h
onap pods --field-selector=status.phase!=Running --all-namespaces
NAMESPACE NAME onap-log-logstash-ftxfz 1/1 READY Terminating STATUS 0 RESTARTS 4hAGE
onap onap-portal-portal-logsdk-logstash7c49c97955-gl59msmbws 0/2 Terminating 0 2d
#"spec": 1/1 Terminating 0 4h
onap onap-log-logstash-nxsf8 1/1 Terminating 0 4h
onap onap-log-logstash-w8q8m 1/1 Terminating 0 4h
sudo helm upgrade -i onap local/onap --namespace onap --set portal.enabled=false
sudo vi portal/charts/portal-sdk/resources/config/deliveries/properties/ONAPPORTALSDK/logback.xml
sudo make portal
sudo make onap
ubuntu@ip-172-31-19-23:~$ kubectl get pods{"containers": [{},"name": "portal-sdk",
kubectl get pods --namespace onap -o jsonpath="{.items[*].spec.containers[0].name}"
portal-sdk
# so combining the two queries
kubectl get pods --field-selector=status.phase!=Running --all-namespaces -o jsonpath="{.items[*].metadata.name}"
onap-portal-portal-sdk-7c49c97955-smbws
# and wrapping it with a delete command
export POD_NAME=$(kubectl get pods --field-selector=status.phase!=Running --all-namespaces -o jsonpath="{.items[*].metadata.name}")
echo "$POD_NAME"
kubectl delete pods $POD_NAME --grace-period=0 --force -n onap
ubuntu@onap-oom-obrien-rancher-e0:~$ sudo ./term.sh
onap-portal-portal-sdk-7c49c97955-smbws
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
pod "onap-portal-portal-sdk-7c49c97955-smbws" force deleted
|
Installing a pod
Code Block |
---|
language | bash |
---|
theme | Midnight |
---|
|
# automatically via cd.sh in LOG-326
# get the dev.yaml and set any pods you want up to true as well as fill out the openstack parameters
sudo wget https://git.onap.org/oom/plain/kubernetes/onap/resources/environments/dev.yaml
sudo cp logging-analytics/deploy/cd.sh .
# or
# manually
cd oom/kubernetes/
sudo make clean
sudo make all
sudo make onap
sudo helm install local/onap -n onap --namespace onap -f onap/resources/environments/disable-allcharts.yaml --set log.enabled=true
# adding another (so)
sudo helm upgrade local/onap --namespace onap -f onap/resources/environments/disable-allcharts.yaml --set so.enabled=true --set log.enabled=true |
Get the nodeport of a particular service
Code Block |
---|
|
# human readable list
kubectl get services --all-namespaces | grep onap-log
sudo helm upgrade -i onap local/onaprobot
# machine readable number - via JSONPath
kubectl get --namespace onap --set log.enabled=true
sudo helm upgrade -i onap local/onap --namespace onap --set portal.enabled=true
ubuntu@ip-172-31-19-23:~$ kubectl get pods --all-namespaces | grep onap-log
onap onap-log-elasticsearch-7557486bc4-2jd65 0/1o jsonpath="{.spec.ports[0].nodePort}" services robot) |
Test DNS URLS in the kubernetes ONAP namespace
Code Block |
---|
|
test urls in the robot container
wget http://pomba-sdcctxbuilder.onap:9530/sdccontextbuilder/health
wget http://pomba-networkdiscoveryctxbuilder.onap:9530/ndcontextbuilder/health |
Override global policy
Code Block |
---|
language | bash |
---|
theme | Midnight |
---|
|
# override global docker pull policy for a single component
# set in oom/kubernetes/onap/values.yaml
# use global.pullPolicy in your -f yaml or a --set
|
Exec into a container of a pod with multiple containers
Code Block |
---|
|
# for
onap Init:0/1 logdemonode-logdemonode-5c8bffb468-dhzcc 2/2 0Running 0 31s onap 1m
# use
kubectl exec -it onaplogdemonode-loglogdemonode-kibana5c8bffb468-fc88b6b79dhzcc -5xqg4n onap -c logdemonode bash |
Push a file into a Kubernetes container/pod
Code Block |
---|
|
opy files from the vm to the robot container - to avoid buiding a 0/1new robot image
root@ubuntu:~/_dev/62405_logback/testsuite/robot/testsuites# kubectl cp Init:0/1 0 31s
onap onap-log-logstash-5vq82 0/1 Init:0/1 0 31s
onap onap-log-logstash-gvr9z 0/1 Init:0/1 0 31shealth-check.robot onap-robot-7c84f54558-f8mw7: -n onap
root@ubuntu:~/_dev/62405_logback/testsuite/robot/testsuites# kubectl cp ../resources/pomba_interface.robot onap-robot-7c84f54558-f8mw7: -n onap
move the files in the robot container to the proper dir
root@onap-robot-7c84f54558-f8mw7:/# cp health-check.robot /var/opt/OpenECOMP_ETE/robot/testsuites/
root@onap-robot-7c84f54558-f8mw7:/# ls
bin boot dev etc health-check.robot home lib lib64 media mnt opt pomba_interface.robot proc root run sbin share srv sys tmp usr var
root@onap-robot-7c84f54558-f8mw7:/# cp pomba_interface.robot /var/opt/OpenECOMP_ETE/robot/resources/
retest health
root@ubuntu:~/_dev/62405_logback/oom/kubernetes/robot# ./ete-k8s.sh onap health
and directly in the robot container
wget http://pomba-sdcctxbuilder.onap:9530/sdccontextbuilder/health
wget http://pomba-networkdiscoveryctxbuilder.onap:9530/ndcontextbuilder/health |
Restarting a container
Restarting a pod
If you change configuration like the logback.xml in a pod or would like restart an entire pod like the log and portal pods
Code Block |
---|
language | bash |
---|
theme | Midnight |
---|
|
cd oom/kubernetes
# do a make if anything is modified in your charts
sudo make all
#sudo make onap
ubuntu@ip-172-31-19-23:~/oom/kubernetes$ sudo helm upgrade -i onap local/onap --namespace onap --set log.enabled=false
# wait and check in another terminal for all containers to terminate
ubuntu@ip-172-31-19-23:~$ kubectl get pods --all-namespaces | grep onap-log
onap onap-log-logstash-qqzq5elasticsearch-7557486bc4-5mng9 0/1 CrashLoopBackOff 9 29m
onap onap-log-kibana-fc88b6b79-nt7sd 0 1/1 Init:0/1Running 0 31s35m
onap onap-log-logstash-vbp2xc5z4d 01/1 Init:0/1 Terminating 0 31s4h
onap onap-log-logstash-wr9rdftxfz 01/1 Init:0/1Terminating 0 0 4h
onap 31s ubuntu@ip-172-31-19-23:~$ kubectl get pods onap-log-alllogstash-namespacesgl59m | grep onap-portal
onap onap-portal-app-8486dc7ff8-nbps7 1/1 Terminating 0/2 Init:0/1 4h
0onap 9m
onaponap-log-logstash-nxsf8 onap-portal-cassandra-8588fbd698-4wthv 1/1 1/1 Running Terminating 0 9m4h
onap onap-portallog-dblogstash-7d6b95cd94-9x4kfw8q8m 0/1 Running 1/1 Terminating 0 0 9m onap 4h
sudo helm upgrade -i onap local/onap onap-portal-dbnamespace onap -config-dpqkqset portal.enabled=false
sudo vi portal/charts/portal-sdk/resources/config/deliveries/properties/ONAPPORTALSDK/logback.xml
sudo make portal
sudo make onap
ubuntu@ip-172-31-19-23:~$ kubectl get pods --all-namespaces | grep onap-log
sudo helm upgrade 0/2-i onap local/onap --namespace onap --set Init:0/1 0 9m
log.enabled=true
sudo helm upgrade -i onap local/onap --namespace onap --set portal.enabled=true
ubuntu@ip-172-31-19-23:~$ kubectl get pods --all-namespaces | grep onap-log
onap onap-portallog-sdkelasticsearch-77cd558c987557486bc4-5255r2jd65 0/21 Init:0/1 0 9m31s
onap onap-portallog-widgetkibana-6469f4bc56-g8s62fc88b6b79-5xqg4 0/1 Init:0/1 0 9m31s
onap onap-portallog-zookeeper-5d8c598c4c-czpnzlogstash-5vq82 0/1 Init:0/1 Running 0 0 31s
onap 9m |
downgrade docker if required
Code Block |
---|
sudo apt-get autoremove onap-y docker-engine |
Change max-pods from default 110 pod limit
Rancher ships with a 110 pod limit - you can override this on the kubernetes template for 1.10
https://lists.onap.org/g/onap-discuss/topic/oom_110_kubernetes_pod/25213556?p=,,,20,0,0,0::recentpostdate%2Fsticky,,,20,2,0,25213556
Manual procedure: change the kubernetes template (1pt2) before using it to create an environment (1a7)
add --max-pods=500 to the "Additional Kubelet Flags" box on the v1.10.13 version of the kubernetes template from the "Manage Environments" dropdown on the left of the 8880 rancher console.
View file |
---|
name | 20180905_rancher_increase_110_pod_limit.mp4 |
---|
height | 250 |
---|
|
Jira Legacy |
---|
server | System Jira |
---|
serverId | 4733707d-2057-3a0f-ae5e-4fd8aff50176 |
---|
key | OOM-1137 |
---|
|
Image Removed
Image Removed
Or capture the output of the REST PUT call - and add around line 111 of the script https://git.onap.org/logging-analytics/tree/deploy/rancher/oom_rancher_setup.sh#n111
Image Removed
Automated - ongoing
Code Block |
---|
|
ubuntu@ip-172-31-27-183:~$ curl 'http://127.0.0.1:8880/v2-beta/projecttemplates/1pt2' --data-binary '{"id":"1pt2","type":"projectTemplate","baseType":"projectTemplate","name":"Kubernetes","state":"active","accountId":null,"created":"2018-09-05T14:12:24Z","createdTS":1536156744000,"data":{"fields":{"stacks":[{"name":"healthcheck","templateId":"library:infra*healthcheck"},{"answers":{"CONSTRAINT_TYPE":"none","CLOUD_PROVIDER":"rancher","AZURE_CLOUD":"AzurePublicCloud","AZURE_TENANT_ID":"","AZURE_CLIENT_ID":"","AZURE_CLIENT_SECRET":"","AZURE_SEC_GROUP":"","RBAC":false,"REGISTRY":"","BASE_IMAGE_NAMESPACE":"","POD_INFRA_CONTAINER_IMAGE":"rancher/pause-amd64:3.0","HTTP_PROXY":"","NO_PROXY":"rancher.internal,cluster.local,rancher-metadata,rancher-kubernetes-auth,kubernetes,169.254.169.254,169.254.169.250,10.42.0.0/16,10.43.0.0/16","ENABLE_ADDONS":true,"ENABLE_RANCHER_INGRESS_CONTROLLER":true,"RANCHER_LB_SEPARATOR":"rancherlb","DNS_REPLICAS":"1","ADDITIONAL_KUBELET_FLAGS":"","FAIL_ON_SWAP":"false","ADDONS_LOG_VERBOSITY_LEVEL":"2","AUDIT_LOGS":false,"ADMISSION_CONTROLLERS":"NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,ResourceQuota","SERVICE_CLUSTER_CIDR":"10.43.0.0/16","DNS_CLUSTER_IP":"10.43.0.10","KUBEAPI_CLUSTER_IP":"10.43.0.1","KUBERNETES_CIPHER_SUITES":"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305","DASHBOARD_CPU_LIMIT":"100m","DASHBOARD_MEMORY_LIMIT":"300Mi","INFLUXDB_HOST_PATH":"","EMBEDDED_BACKUPS":true,"BACKUP_PERIOD":"15m0s","BACKUP_RETENTION":"24h","ETCD_HEARTBEAT_INTERVAL":"500","ETCD_ELECTION_TIMEOUT":"5000"},"name":"kubernetes","templateVersionId":"library:infra*k8s:47"},{"name":"network-services","templateId":"library:infra*network-services"},{"name":"ipsec","templateId":"library:infra*ipsec"}]}},"description":"Default Kubernetes template","externalId":"catalog://library:project*kubernetes:0","isPublic":true,"kind":"projectTemplate","removeTime":null,"removed":null,"stacks":[{"type":"catalogTemplate","name":"healthcheck","templateId":"library:infra*healthcheck"},{"type":"catalogTemplate","answers":{"CONSTRAINT_TYPE":"none","CLOUD_PROVIDER":"rancher","AZURE_CLOUD":"AzurePublicCloud","AZURE_TENANT_ID":"","AZURE_CLIENT_ID":"","AZURE_CLIENT_SECRET":"","AZURE_SEC_GROUP":"","RBAC":false,"REGISTRY":"","BASE_IMAGE_NAMESPACE":"","POD_INFRA_CONTAINER_IMAGE":"rancher/pause-amd64:3.0","HTTP_PROXY":"","NO_PROXY":"rancher.internal,cluster.local,rancher-metadata,rancher-kubernetes-auth,kubernetes,169.254.169.254,169.254.169.250,10.42.0.0/16,10.43.0.0/16","ENABLE_ADDONS":true,"ENABLE_RANCHER_INGRESS_CONTROLLER":true,"RANCHER_LB_SEPARATOR":"rancherlb","DNS_REPLICAS":"1","ADDITIONAL_KUBELET_FLAGS":"--max-pods=600","FAIL_ON_SWAP":"false","ADDONS_LOG_VERBOSITY_LEVEL":"2","AUDIT_LOGS":false,"ADMISSION_CONTROLLERS":"NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,ResourceQuota","SERVICE_CLUSTER_CIDR":"10.43.0.0/16","DNS_CLUSTER_IP":"10.43.0.10","KUBEAPI_CLUSTER_IP":"10.43.0.1","KUBERNETES_CIPHER_SUITES":"TLS_ECDHE_log-logstash-gvr9z 0/1 Init:0/1 0 31s
onap onap-log-logstash-qqzq5 0/1 Init:0/1 0 31s
onap onap-log-logstash-vbp2x 0/1 Init:0/1 0 31s
onap onap-log-logstash-wr9rd 0/1 Init:0/1 0 31s
ubuntu@ip-172-31-19-23:~$ kubectl get pods --all-namespaces | grep onap-portal
onap onap-portal-app-8486dc7ff8-nbps7 0/2 Init:0/1 0 9m
onap onap-portal-cassandra-8588fbd698-4wthv 1/1 Running 0 9m
onap onap-portal-db-7d6b95cd94-9x4kf 0/1 Running 0 9m
onap onap-portal-db-config-dpqkq 0/2 Init:0/1 0 9m
onap onap-portal-sdk-77cd558c98-5255r 0/2 Init:0/1 0 9m
onap onap-portal-widget-6469f4bc56-g8s62 0/1 Init:0/1 0 9m
onap onap-portal-zookeeper-5d8c598c4c-czpnz 1/1 Running 0 9m |
Kubernetes inter pod communication - using DNS service addresses
Try to use the service name (with or without the namespace) - not the service IP address for inter namespace communication (nodeports or ingress is only required outside the namespace)
For example log-ls:5044 or log-ls.onap:5044
Code Block |
---|
|
# example curl call between AAI and SDC
amdocs@obriensystemsu0:~$ kubectl exec -it -n onap onap-aai-aai-graphadmin-7bd5fc9bd-l4v4z bash
Defaulting container name to aai-graphadmin.
root@aai-graphadmin:/opt/app/aai-graphadmin# curl http://sdc-fe:8181
<HTML><HEAD><TITLE>Error 404 - Not Found</TITLE><BODY><H2>Error 404 - Not Found.</H2>
</ul><hr><a href="http://eclipse.org/jetty"><img border=0 src="/favicon.ico"/></a> <a href="http://eclipse.org/jetty">Powered by Jetty:// 9.4.12.v20180830</a><hr/> |
docker if required
Code Block |
---|
sudo apt-get autoremove -y docker-engine |
Change max-pods from default 110 pod limit
Rancher ships with a 110 pod limit - you can override this on the kubernetes template for 1.10
https://lists.onap.org/g/onap-discuss/topic/oom_110_kubernetes_pod/25213556?p=,,,20,0,0,0::recentpostdate%2Fsticky,,,20,2,0,25213556
Manual procedure: change the kubernetes template (1pt2) before using it to create an environment (1a7)
add --max-pods=500 to the "Additional Kubelet Flags" box on the v1.10.13 version of the kubernetes template from the "Manage Environments" dropdown on the left of the 8880 rancher console.
View file |
---|
name | 20180905_rancher_increase_110_pod_limit.mp4 |
---|
height | 250 |
---|
|
Jira Legacy |
---|
server | System Jira |
---|
serverId | 4733707d-2057-3a0f-ae5e-4fd8aff50176 |
---|
key | OOM-1137 |
---|
|
Image Added
Image Added
Or capture the output of the REST PUT call - and add around line 111 of the script https://git.onap.org/logging-analytics/tree/deploy/rancher/oom_rancher_setup.sh#n111
Image Added
Automated - ongoing
Code Block |
---|
|
ubuntu@ip-172-31-27-183:~$ curl 'http://127.0.0.1:8880/v2-beta/projecttemplates/1pt2' --data-binary '{"id":"1pt2","type":"projectTemplate","baseType":"projectTemplate","name":"Kubernetes","state":"active","accountId":null,"created":"2018-09-05T14:12:24Z","createdTS":1536156744000,"data":{"fields":{"stacks":[{"name":"healthcheck","templateId":"library:infra*healthcheck"},{"answers":{"CONSTRAINT_TYPE":"none","CLOUD_PROVIDER":"rancher","AZURE_CLOUD":"AzurePublicCloud","AZURE_TENANT_ID":"","AZURE_CLIENT_ID":"","AZURE_CLIENT_SECRET":"","AZURE_SEC_GROUP":"","RBAC":false,"REGISTRY":"","BASE_IMAGE_NAMESPACE":"","POD_INFRA_CONTAINER_IMAGE":"rancher/pause-amd64:3.0","HTTP_PROXY":"","NO_PROXY":"rancher.internal,cluster.local,rancher-metadata,rancher-kubernetes-auth,kubernetes,169.254.169.254,169.254.169.250,10.42.0.0/16,10.43.0.0/16","ENABLE_ADDONS":true,"ENABLE_RANCHER_INGRESS_CONTROLLER":true,"RANCHER_LB_SEPARATOR":"rancherlb","DNS_REPLICAS":"1","ADDITIONAL_KUBELET_FLAGS":"","FAIL_ON_SWAP":"false","ADDONS_LOG_VERBOSITY_LEVEL":"2","AUDIT_LOGS":false,"ADMISSION_CONTROLLERS":"NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,ResourceQuota","SERVICE_CLUSTER_CIDR":"10.43.0.0/16","DNS_CLUSTER_IP":"10.43.0.10","KUBEAPI_CLUSTER_IP":"10.43.0.1","KUBERNETES_CIPHER_SUITES":"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305","DASHBOARD_CPU_LIMIT":"100m","DASHBOARD_MEMORY_LIMIT":"300Mi","INFLUXDB_HOST_PATH":"","EMBEDDED_BACKUPS":true,"BACKUP_PERIOD":"15m0s","BACKUP_RETENTION":"24h","ETCD_HEARTBEAT_INTERVAL":"500","ETCD_ELECTION_TIMEOUT":"5000"},"name":"kubernetes","templateVersionId":"library:infra*k8s:47"},{"type":"catalogTemplate","name":"network-services","templateId":"library:infra*network-services"},{"type":"catalogTemplate","name":"ipsec","templateId":"library:infra*ipsec"}],"transitioning":"no","transitioningMessage":null,"transitioningProgress":null,"uuid":null}' --compressed
{"id":"9107b9ce-0b61-4c22-bc52-f147babb0ba7","type":"error","links":{},"actions":{},"status":405,"code":"Method not allowed","message":"Method not allowed","detail":null,"baseType":"error"} |
Results
Single AWS 244G 32vCore VM with 110 pod limit workaround - 164 pods (including both secondary DCAEGEN2 orchestrations at 30 and 55 min) - most of the remaining 8 container failures are known/in-progress issues.
Code Block |
---|
|
ubuntu@ip-172-31-20-218:~$ free
k8s:47"},{"name":"network-services","templateId":"library:infra*network-services"},{"name":"ipsec","templateId":"library:infra*ipsec"}]}},"description":"Default Kubernetes template","externalId":"catalog://library:project*kubernetes:0","isPublic":true,"kind":"projectTemplate","removeTime":null,"removed":null,"stacks":[{"type":"catalogTemplate","name":"healthcheck","templateId":"library:infra*healthcheck"},{"type":"catalogTemplate","answers":{"CONSTRAINT_TYPE":"none","CLOUD_PROVIDER":"rancher","AZURE_CLOUD":"AzurePublicCloud","AZURE_TENANT_ID":"","AZURE_CLIENT_ID":"","AZURE_CLIENT_SECRET":"","AZURE_SEC_GROUP":"","RBAC":false,"REGISTRY":"","BASE_IMAGE_NAMESPACE":"","POD_INFRA_CONTAINER_IMAGE":"rancher/pause-amd64:3.0","HTTP_PROXY":"","NO_PROXY":"rancher.internal,cluster.local,rancher-metadata,rancher-kubernetes-auth,kubernetes,169.254.169.254,169.254.169.250,10.42.0.0/16,10.43.0.0/16","ENABLE_ADDONS":true,"ENABLE_RANCHER_INGRESS_CONTROLLER":true,"RANCHER_LB_SEPARATOR":"rancherlb","DNS_REPLICAS":"1","ADDITIONAL_KUBELET_FLAGS":"--max-pods=600","FAIL_ON_SWAP":"false","ADDONS_LOG_VERBOSITY_LEVEL":"2","AUDIT_LOGS":false,"ADMISSION_CONTROLLERS":"NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,DefaultTolerationSeconds,ResourceQuota","SERVICE_CLUSTER_CIDR":"10.43.0.0/16","DNS_CLUSTER_IP":"10.43.0.10","KUBEAPI_CLUSTER_IP":"10.43.0.1","KUBERNETES_CIPHER_SUITES":"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305","DASHBOARD_CPU_LIMIT":"100m","DASHBOARD_MEMORY_LIMIT":"300Mi","INFLUXDB_HOST_PATH":"","EMBEDDED_BACKUPS":true,"BACKUP_PERIOD":"15m0s","BACKUP_RETENTION":"24h","ETCD_HEARTBEAT_INTERVAL":"500","ETCD_ELECTION_TIMEOUT":"5000"},"name":"kubernetes","templateVersionId":"library:infra*k8s:47"},{"type":"catalogTemplate","name":"network-services","templateId":"library:infra*network-services"},{"type":"catalogTemplate","name":"ipsec","templateId":"library:infra*ipsec"}],"transitioning":"no","transitioningMessage":null,"transitioningProgress":null,"uuid":null}' --compressed
{"id":"9107b9ce-0b61-4c22-bc52-f147babb0ba7","type":"error","links":{},"actions":{},"status":405,"code":"Method not allowed","message":"Method not allowed","detail":null,"baseType":"error"} |
Results
Single AWS 244G 32vCore VM with 110 pod limit workaround - 164 pods (including both secondary DCAEGEN2 orchestrations at 30 and 55 min) - most of the remaining 8 container failures are known/in-progress issues.
Code Block |
---|
|
ubuntu@ip-172-31-20-218:~$ free
total used free shared buff/cache available
Mem: 251754696 111586672 45000724 193628 95167300 137158588
ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep onap | wc -l
164
ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep onap | grep -E '1/1|2/2' | wc -l
155
ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep -E '0/|1/2' | wc -l
8
ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep -E '0/|1/2'
onap dep-dcae-ves-collector-59d4ff58f7-94rpq 1/2 Running 0 4m
onap onap-aai-champ-68ff644d85-rv7tr 0/1 Running 0 59m
onap onap-aai-gizmo-856f86d664-q5pvg 1/2 CrashLoopBackOff 10 59m
onap onap-oof-85864d6586-zcsz5 0/1 ImagePullBackOff 0 59m
onap onap-pomba-kibana-d76b6dd4c-sfbl6 0/1 Init:CrashLoopBackOff 8 59m
onap onap-pomba-networkdiscovery-85d76975b7-mfk92 1/2 CrashLoopBackOff 11 59m
onap onap-pomba-networkdiscoveryctxbuilder-c89786dfc-qnlx9 1/2 CrashLoopBackOff 10 59m
onap onap-vid-84c88db589-8cpgr 1/2 CrashLoopBackOff 9 59m |
Operations
Get failed/pending containers
Code Block |
---|
theme | Midnight |
---|
linenumbers | true |
---|
|
kubectl get pods --all-namespaces | grep -E "0/|1/2" | wc -l |
Code Block |
---|
kubectl cluster-info
# get pods/containers
kubectl get pods --all-namespaces
# get port mappings
kubectl get services --all-namespaces -o wide
NAMESPACE NAME READY STATUS RESTARTS AGE
default nginx-1389790254-lgkz3 1/1 Running 1 5d
kube-system heapster-4285517626-x080g 1/1 Running 1 6d
kube-system kube-dns-638003847-tst97 3/3 Running 3 6d
kube-system kubernetes-dashboard-716739405-fnn3g 1/1 Running 2 6d
kube-system monitoring-grafana-2360823841-hr824 1/1 Running 1 6d
kube-system monitoring-influxdb-2323019309-k7h1t 1/1 Running 1 6d
kube-system tiller-deploy-737598192-x9wh5 1/1 Running 1 6d
# ssh into a pod
kubectl -n default exec -it nginx-1389790254-lgkz3 /bin/bash
# get logs
kubectl -n default logs -f nginx-1389790254-lgkz3 |
Exec
kubectl -n onap-aai exec -it aai-resources-1039856271-d9bvq bash
Bounce/Fix a failed container
Periodically one of the higher containers in a dependency tree will not get restarted in time to pick up running child containers - usually this is the kibana container
Fix this or "any" container by deleting the container in question and kubernetes will bring another one up.
Code Block |
---|
|
root@a-onap-auto-20180412-ref:~# kubectl get services --all-namespaces | grep log
onap dev-vfc-catalog ClusterIP 10.43.210.8 <none> 8806/TCP 5d
onap log-es NodePort 10.43.77.87 <none> 9200:30254/TCP 5d
onap log-es-tcp ClusterIP 10.43.159.93 <none> 9300/TCP 5d
onap log-kibana NodePort 10.43.41.102 <none> 5601:30253/TCP 5d
onap log-ls NodePort 10.43.180.165 <none> 5044:30255/TCP 5d
onap log-ls-http ClusterIP 10.43.13.180 <none> 9600/TCP 5d
root@a-onap-auto-20180412-ref:~# kubectl get pods --all-namespaces | grep log
onap dev-log-elasticsearch-66cdc4f855-wmpkz 1/1 Running 0 5d
onap dev-log-kibana-5b6f86bcb4-drpzq 0/1 Running 1076 5d
onap dev-log-logstash-6d9fdccdb6-ngq2f 1/1 Running 0 5d
onap dev-vfc-catalog-7d89bc8b9d-vxk74 2/2 Running 0 5d
root@a-onap-auto-20180412-ref:~# kubectl delete pod dev-log-kibana-5b6f86bcb4-drpzq -n onap
pod "dev-log-kibana-5b6f86bcb4-drpzq" deleted
root@a-onap-auto-20180412-ref:~# kubectl get pods --all-namespaces | grep log
onap dev-log-elasticsearch-66cdc4f855-wmpkz 1/1 Running 0 5d
onap dev-log-kibana-5b6f86bcb4-drpzq 0/1 Terminating 1076 5d
onap dev-log-kibana-5b6f86bcb4-gpn2m 0/1 Pending 0 12s
onap dev-log-logstash-6d9fdccdb6-ngq2f 1/1 Running 0 5d
onap dev-vfc-catalog-7d89bc8b9d-vxk74 2/2 Running 0 5d |
a helm namespace delete or a kubectl delete or a helm purge may not remove everything based on hanging PVs - use
Code Block |
---|
|
#after a kubectl delete namespace onap
sudo helm delete --purge onap
melliott [12:11 PM]
kubectl delete pods <pod> --grace-period=0 --force -n onap |
Reboot VMs hosting a Deployment
aka https://lists.onap.org/g/onap-discuss/topic/procedure_to_shut_down_and/29540879?p=,,,20,0,0,0::recentpostdate%2Fsticky,,,20,2,0,29540879
in progress
Code Block |
---|
|
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | wc -l
234
# master 20190125
ubuntu@a-ld0:~$ kubectl scale --replicas=0 deployments --all -n onap
deployment.extensions/onap-aaf-aaf-cm scaled
deployment.extensions/onap-aaf-aaf-cs scaled
deployment.extensions/onap-aaf-aaf-fs scaled
deployment.extensions/onap-aaf-aaf-gui scaled
deployment.extensions/onap-aaf-aaf-hello scaled
deployment.extensions/onap-aaf-aaf-locate scaled
deployment.extensions/onap-aaf-aaf-oauth scaled
deployment.extensions/onap-aaf-aaf-service scaled
deployment.extensions/onap-aaf-aaf-sms scaled
deployment.extensions/onap-aai-aai scaled
deployment.extensions/onap-aai-aai-babel scaled
deployment.extensions/onap-aai-aai-champ scaled
deployment.extensions/onap-aai-aai-data-router scaled
deployment.extensions/onap-aai-aai-elasticsearch scaled
deployment.extensions/onap-aai-aai-gizmo scaled
deployment.extensions/onap-aai-aai-graphadmin scaled
deployment.extensions/onap-aai-aai-modelloader scaled
deployment.extensions/onap-aai-aai-resources scaled
deployment.extensions/onap-aai-aai-search-data scaled
deployment.extensions/onap-aai-aai-sparky-be scaled
deployment.extensions/onap-aai-aai-spike scaled
deployment.extensions/onap-aai-aai-traversal scaled
deployment.extensions/onap-appc-appc-ansible-server scaled
deployment.extensions/onap-appc-appc-cdt scaled
deployment.extensions/onap-appc-appc-dgbuilder scaled
deployment.extensions/onap-clamp-clamp scaled
deployment.extensions/onap-clamp-clamp-dash-es scaled
deployment.extensions/onap-clamp-clamp-dash-kibana scaled
deployment.extensions/onap-clamp-clamp-dash-logstash scaled
deployment.extensions/onap-clamp-clampdb scaled
deployment.extensions/onap-cli-cli scaled
deployment.extensions/onap-consul-consul scaled
deployment.extensions/onap-contrib-netbox-app scaled
deployment.extensions/onap-contrib-netbox-nginx scaled
deployment.extensions/onap-contrib-netbox-postgres scaled
deployment.extensions/onap-dcaegen2-dcae-bootstrap scaled
deployment.extensions/onap-dcaegen2-dcae-cloudify-manager scaled
deployment.extensions/onap-dcaegen2-dcae-healthcheck scaled
deployment.extensions/onap-dcaegen2-dcae-pgpool scaled
deployment.extensions/onap-dmaap-dbc-pgpool scaled
deployment.extensions/onap-dmaap-dmaap-bus-controller scaled
deployment.extensions/onap-dmaap-dmaap-dr-db scaled
deployment.extensions/onap-dmaap-dmaap-dr-node scaled
deployment.extensions/onap-dmaap-dmaap-dr-prov scaled
deployment.extensions/onap-esr-esr-gui scaled
deployment.extensions/onap-esr-esr-server scaled
deployment.extensions/onap-log-log-elasticsearch scaled
deployment.extensions/onap-log-log-kibana scaled
deployment.extensions/onap-log-log-logstash scaled
deployment.extensions/onap-msb-kube2msb scaled
deployment.extensions/onap-msb-msb-consul scaled
deployment.extensions/onap-msb-msb-discovery scaled
deployment.extensions/onap-msb-msb-eag scaled
deployment.extensions/onap-msb-msb-iag scaled
deployment.extensions/onap-multicloud-multicloud scaled
deployment.extensions/onap-multicloud-multicloud-azure scaled
deployment.extensions/onap-multicloud-multicloud-ocata scaled
deployment.extensions/onap-multicloud-multicloud-pike scaled
deployment.extensions/onap-multicloud-multicloud-vio scaled
deployment.extensions/onap-multicloud-multicloud-windriver scaled
deployment.extensions/onap-oof-music-tomcat scaled
deployment.extensions/onap-oof-oof scaled
deployment.extensions/onap-oof-oof-cmso-service scaled
deployment.extensions/onap-oof-oof-has-api scaled
deployment.extensions/onap-oof-oof-has-controller scaled
deployment.extensions/onap-oof-oof-has-data scaled
deployment.extensions/onap-oof-oof-has-reservation scaled
deployment.extensions/onap-oof-oof-has-solver scaled
deployment.extensions/onap-policy-brmsgw scaled
deployment.extensions/onap-policy-nexus scaled
deployment.extensions/onap-policy-pap scaled
deployment.extensions/onap-policy-policy-distribution scaled
deployment.extensions/onap-policy-policydb scaled
deployment.extensions/onap-pomba-pomba-aaictxbuilder scaled
deployment.extensions/onap-pomba-pomba-contextaggregator scaled
deployment.extensions/onap-pomba-pomba-data-router scaled
deployment.extensions/onap-pomba-pomba-elasticsearch scaled
deployment.extensions/onap-pomba-pomba-kibana scaled
deployment.extensions/onap-pomba-pomba-networkdiscovery scaled
deployment.extensions/onap-pomba-pomba-networkdiscoveryctxbuilder scaled
deployment.extensions/onap-pomba-pomba-sdcctxbuilder scaled
deployment.extensions/onap-pomba-pomba-sdncctxbuilder scaled
deployment.extensions/onap-pomba-pomba-search-data scaled
deployment.extensions/onap-pomba-pomba-servicedecomposition scaled
deployment.extensions/onap-pomba-pomba-validation-service scaled
deployment.extensions/onap-portal-portal-app scaled
deployment.extensions/onap-portal-portal-cassandra scaled
deployment.extensions/onap-portal-portal-db scaled
deployment.extensions/onap-portal-portal-sdk scaled
deployment.extensions/onap-portal-portal-widget scaled
deployment.extensions/onap-portal-portal-zookeeper scaled
deployment.extensions/onap-robot-robot scaled
deployment.extensions/onap-sdc-sdc-be scaled
deployment.extensions/onap-sdc-sdc-cs scaled
deployment.extensions/onap-sdc-sdc-dcae-be scaled
deployment.extensions/onap-sdc-sdc-dcae-dt scaled
deployment.extensions/onap-sdc-sdc-dcae-fe scaled
deployment.extensions/onap-sdc-sdc-dcae-tosca-lab scaled
deployment.extensions/onap-sdc-sdc-es scaled
deployment.extensions/onap-sdc-sdc-fe scaled
deployment.extensions/onap-sdc-sdc-kb scaled
deployment.extensions/onap-sdc-sdc-onboarding-be scaled
deployment.extensions/onap-sdc-sdc-wfd-be scaled
deployment.extensions/onap-sdc-sdc-wfd-fe scaled
deployment.extensions/onap-sdnc-controller-blueprints scaled
deployment.extensions/onap-sdnc-network-name-gen scaled
deployment.extensions/onap-sdnc-sdnc-ansible-server scaled
deployment.extensions/onap-sdnc-sdnc-dgbuilder scaled
deployment.extensions/onap-sdnc-sdnc-dmaap-listener scaled
deployment.extensions/onap-sdnc-sdnc-portal scaled
deployment.extensions/onap-sdnc-sdnc-ueb-listener scaled
deployment.extensions/onap-sniro-emulator-sniro-emulator scaled
deployment.extensions/onap-so-so scaled
deployment.extensions/onap-so-so-bpmn-infra scaled
deployment.extensions/onap-so-so-catalog-db-adapter scaled
deployment.extensions/onap-so-so-mariadb scaled
deployment.extensions/onap-so-so-monitoring scaled
deployment.extensions/onap-so-so-openstack-adapter scaled
deployment.extensions/onap-so-so-request-db-adapter scaled
deployment.extensions/onap-so-so-sdc-controller scaled
deployment.extensions/onap-so-so-sdnc-adapter scaled
deployment.extensions/onap-so-so-vfc-adapter scaled
deployment.extensions/onap-uui-uui scaled
deployment.extensions/onap-uui-uui-server scaled
deployment.extensions/onap-vfc-vfc-catalog scaled
deployment.extensions/onap-vfc-vfc-db scaled
deployment.extensions/onap-vfc-vfc-ems-driver scaled
deployment.extensions/onap-vfc-vfc-generic-vnfm-driver scaled
deployment.extensions/onap-vfc-vfc-huawei-vnfm-driver scaled
deployment.extensions/onap-vfc-vfc-juju-vnfm-driver scaled
deployment.extensions/onap-vfc-vfc-multivim-proxy scaled
deployment.extensions/onap-vfc-vfc-nokia-v2vnfm-driver scaled
deployment.extensions/onap-vfc-vfc-nokia-vnfm-driver scaled
deployment.extensions/onap-vfc-vfc-nslcm scaled
deployment.extensions/onap-vfc-vfc-resmgr scaled
deployment.extensions/onap-vfc-vfc-vnflcm scaled
deployment.extensions/onap-vfc-vfc-vnfmgr scaled
deployment.extensions/onap-vfc-vfc-vnfres scaled
deployment.extensions/onap-vfc-vfc-workflow scaled
deployment.extensions/onap-vfc-vfc-workflow-engine scaled
deployment.extensions/onap-vfc-vfc-zte-sdnc-driver scaled
deployment.extensions/onap-vfc-vfc-zte-vnfm-driver scaled
deployment.extensions/onap-vid-vid scaled
deployment.extensions/onap-vnfsdk-vnfsdk scaled
deployment.extensions/onap-vnfsdk-vnfsdk-pgpool scaled
deployment.extensions/onap-vvp-vvp scaled
deployment.extensions/onap-vvp-vvp-ci-uwsgi scaled
deployment.extensions/onap-vvp-vvp-cms-uwsgi scaled
deployment.extensions/onap-vvp-vvp-em-uwsgi scaled
deployment.extensions/onap-vvp-vvp-ext-haproxy scaled
deployment.extensions/onap-vvp-vvp-gitlab scaled
deployment.extensions/onap-vvp-vvp-imagescanner scaled
deployment.extensions/onap-vvp-vvp-int-haproxy scaled
deployment.extensions/onap-vvp-vvp-jenkins scaled
deployment.extensions/onap-vvp-vvp-postgres scaled
deployment.extensions/onap-vvp-vvp-redis scaled
ubuntu@a-ld0:~$ kubectl scale --replicas=0 statefulsets --all -n onap
statefulset.apps/onap-aaf-aaf-sms-quorumclient scaled
statefulset.apps/onap-aaf-aaf-sms-vault scaled
statefulset.apps/onap-aai-aai-cassandra scaled
statefulset.apps/onap-appc-appc scaled
statefulset.apps/onap-appc-appc-db scaled
statefulset.apps/onap-consul-consul-server scaled
statefulset.apps/onap-dcaegen2-dcae-db scaled
statefulset.apps/onap-dcaegen2-dcae-redis scaled
statefulset.apps/onap-dmaap-dbc-pg scaled
statefulset.apps/onap-dmaap-message-router scaled
statefulset.apps/onap-dmaap-message-router-kafka scaled
statefulset.apps/onap-dmaap-message-router-zookeeper scaled
statefulset.apps/onap-oof-cmso-db scaled
statefulset.apps/onap-oof-music-cassandra scaled
statefulset.apps/onap-oof-zookeeper scaled
statefulset.apps/onap-policy-drools scaled
statefulset.apps/onap-policy-pdp scaled
statefulset.apps/onap-policy-policy-apex-pdp scaled
statefulset.apps/onap-sdnc-controller-blueprints-db scaled
statefulset.apps/onap-sdnc-nengdb scaled
statefulset.apps/onap-sdnc-sdnc scaled
statefulset.apps/onap-sdnc-sdnc-db scaled
statefulset.apps/onap-vid-vid-mariadb-galera scaled
statefulset.apps/onap-vnfsdk-vnfsdk-postgres scaled
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | grep Terminating | wc -l
179
# 4 min later
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | grep Terminating | wc -l
118
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | wc -l
135
# completed/failed jobs are left
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | wc -l
27
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | grep Terminating | wc -l
0
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system heapster-7b48b696fc-99cd6 1/1 Running 0 2d
kube-system kube-dns-6655f78c68-k4dh4 3/3 Running 0 2d
kube-system kubernetes-dashboard-6f54f7c4b-fhqmf 1/1 Running 0 2d
kube-system monitoring-grafana-7877679464-cscg4 1/1 Running 0 2d
kube-system monitoring-influxdb-64664c6cf5-wmw8w 1/1 Running 0 2d
kube-system tiller-deploy-78db58d887-9qlwh 1/1 Running 0 2d
onap onap-aaf-aaf-sms-preload-k7mx6 0/1 Completed 0 2d
onap onap-aaf-aaf-sshsm-distcenter-lk5st 0/1 Completed 0 2d
onap onap-aaf-aaf-sshsm-testca-lg2g6 0/1 Completed 0 2d
onap onap-aai-aai-graphadmin-create-db-schema-7qhcr 0/1 Completed 0 2d
onap onap-aai-aai-traversal-update-query-data-n6dt6 0/1 Init:0/1 289 2d
onap onap-contrib-netbox-app-provisioning-7mb4f 0/1 Completed 0 2d
onap onap-contrib-netbox-app-provisioning-wbvpv 0/1 Error 0 2d
onap onap-oof-music-cassandra-job-config-wvwgv 0/1 Completed 0 2d
onap onap-oof-oof-has-healthcheck-s44jv 0/1 Completed 0 2d
onap onap-oof-oof-has-onboard-kcfb6 0/1 Completed 0 2d
onap onap-portal-portal-db-config-vt848 0/2 Completed 0 2d
totalonap used onap-sdc-sdc-be-config-backend-cktdp free shared buff0/cache1 available Mem: Completed 2517546960 111586672 45000724 2d
onap 193628 95167300 137158588 ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep onap | wc -l
164
ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep onap | grep -E '1/1|2/2' | wc -l
155
ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep -E '0/|1/2' | wc -l
8
ubuntu@ip-172-31-20-218:~$ kubectl get pods --all-namespaces | grep -E '0/|1/2'
onap dep-dcae-ves-collector-59d4ff58f7-94rpq onap-sdc-sdc-cs-config-cassandra-t5lt7 0/1 Completed 0 2d
onap onap-sdc-sdc-dcae-be-tools-8pkqz 0/1 Completed 0 2d
onap 1/2onap-sdc-sdc-dcae-be-tools-lrcwk Running 0/1 Init:Error 0 4m2d
onap onap-sdc-aaisdc-es-champconfig-68ff644d85-rv7trelasticsearch-9zrdw 0/1 Completed 0 2d
0/1onap Running onap-sdc-sdc-onboarding-be-cassandra-init-8klpv 0/1 Completed 0 59m2d
onap onap-aaisdc-sdc-gizmowfd-856f86d664-q5pvg be-workflow-init-b4j4v 1/20/1 CrashLoopBackOffCompleted 0 10 59m2d
onap onap-vid-oofvid-galera-85864d6586-zcsz5 config-d4srr 0/1 ImagePullBackOffCompleted 0 59m2d
onap onap-pombavnfsdk-vnfsdk-kibanainit-d76b6dd4cpostgres-sfbl6bm668 0/1 Completed 0/1 2d
# Init:CrashLoopBackOffdeployments are still 8there
# reboot server
ubuntu@a-ld0:~$ sudo helm list
NAME 59m onap onap-pomba-networkdiscovery-85d76975b7-mfk92 REVISION UPDATED 1/2 CrashLoopBackOff STATUS CHART 11 59m NAMESPACE
onap onap-pomba-networkdiscoveryctxbuilder-c89786dfc-qnlx9 28 1/2 Thu Jan CrashLoopBackOff24 18:48:42 2019 DEPLOYED onap-3.0.0 10 onap 59m
onap-aaf onap-vid-84c88db589-8cpgr 23 Thu Jan 24 18:48:45 2019 DEPLOYED aaf-3.0.0 onap
onap-aai 1/2 CrashLoopBackOff 21 Thu 9Jan 24 18:48:51 2019 DEPLOYED aai-3.0.0 59m |
Operations
Get failed/pending containers
Code Block |
---|
theme | Midnight |
---|
linenumbers | true |
---|
|
kubectl get pods --all-namespaces | grep -E "0/|1/2" | wc -l |
Code Block |
---|
kubectl cluster-info
# get pods/containers
kubectl get pods --all-namespaces
# get port mappings
kubectl get services --all-namespaces -o wide
NAMESPACE onap
onap-appc 7 Thu Jan 24 18:49:02 2019 DEPLOYED appc-3.0.0 NAME onap
onap-clamp 6 Thu Jan 24 18:49:06 2019 DEPLOYED clamp-3.0.0 READY onap STATUS
onap-cli RESTARTS AGE default 5 nginx-1389790254-lgkz3 Thu Jan 24 18:49:09 2019 DEPLOYED cli-3.0.0 1/1 onap Running
onap-consul 1 27 5d kube-system heapster-4285517626-x080g Thu Jan 24 18:49:11 2019 DEPLOYED consul-3.0.0 onap 1/1
onap-contrib Running 1 2 Thu Jan 24 6d
kube-system18:49:14 2019 DEPLOYED contrib-3.0.0 kube-dns-638003847-tst97 onap
onap-dcaegen2 3/3 24 Thu RunningJan 24 18:49:18 2019 DEPLOYED dcaegen2-3.0.0 onap 6d
kubeonap-systemdmaap kubernetes-dashboard-716739405-fnn3g 1/1 25 Running Thu 2Jan 24 18:49:22 2019 DEPLOYED dmaap-3.0.0 6d kube-system onap monitoring-grafana-2360823841-hr824
1/1onap-esr Running 1 20 Thu Jan 24 6d
kube-system18:49:27 2019 DEPLOYED esr-3.0.0 monitoring-influxdb-2323019309-k7h1t 1/1 onap Running
1onap-log 6d kube-system 11 tiller-deploy-737598192-x9wh5 Thu Jan 24 18:49:31 2019 DEPLOYED log-3.0.0 1/1 Running onap 1
onap-msb 6d # ssh into a pod 26 kubectl -n default exec -it nginx-1389790254-lgkz3 /bin/bash
# get logs
kubectl -n default logs -f nginx-1389790254-lgkz3 |
Exec
kubectl -n onap-aai exec -it aai-resources-1039856271-d9bvq bash
Bounce/Fix a failed container
Periodically one of the higher containers in a dependency tree will not get restarted in time to pick up running child containers - usually this is the kibana container
Fix this or "any" container by deleting the container in question and kubernetes will bring another one up.
Code Block |
---|
|
root@a-onap-auto-20180412-ref:~# kubectl get services --all-namespaces | grep log
onap dev-vfc-catalog ClusterIP 10.43.210.8 <none> 8806/TCP Thu Jan 24 18:49:34 2019 DEPLOYED msb-3.0.0 onap
onap-multicloud 19 Thu Jan 24 18:49:37 2019 DEPLOYED multicloud-3.0.0 onap
onap-oof 18 Thu Jan 24 18:49:44 2019 DEPLOYED oof-3.0.0 onap
onap-policy 13 Thu Jan 24 18:49:52 2019 DEPLOYED policy-3.0.0 onap
onap-pomba 4 5d onap Thu Jan 24 18:49:56 2019 DEPLOYED pomba-3.0.0 log-es onap
onap-portal 12 NodePort Thu Jan 24 10.43.77.8718:50:03 2019 DEPLOYED portal-3.0.0 <none> onap
9200:30254/TCPonap-robot 22 Thu Jan 24 18:50:08 2019 DEPLOYED robot-3.0.0 onap
onap-sdc 16 Thu Jan 24 18:50:11 2019 DEPLOYED sdc-3.0.0 5d onap
onap-sdnc log-es-tcp 15 Thu Jan 24 18:50:17 2019 DEPLOYED sdnc-3.0.0 ClusterIP 10.43.159.93 onap <none>
onap-sniro-emulator 1 9300/TCP Thu Jan 24 18:50:21 2019 DEPLOYED sniro-emulator-3.0.0 onap
onap-so 17 Thu Jan 24 18:50:24 2019 DEPLOYED so-3.0.0 onap
onap-uui 9 5d onap Thu Jan 24 18:50:30 2019 DEPLOYED uui-3.0.0 log-kibana onap
onap-vfc NodePort 10.43.41.102 10 <none> Thu Jan 5601:30253/TCP 24 18:50:33 2019 DEPLOYED vfc-3.0.0 onap
onap-vid 14 Thu Jan 24 18:50:38 2019 DEPLOYED vid-3.0.0 onap
5d
onaponap-vnfsdk 8 log-ls Thu Jan 24 18:50:41 2019 DEPLOYED vnfsdk-3.0.0 onap
onap-vvp NodePort 10.43.180.165 <none> 5044:30255/TCP 3 Thu Jan 24 18:50:44 2019 DEPLOYED vvp-3.0.0 onap
sudo reboot now
ubuntu@a-ld0:~$ sudo docker ps
CONTAINER ID IMAGE 5dCOMMAND onap log-ls-http CREATED ClusterIP STATUS 10.43.13.180 <none> 9600/TCPPORTS NAMES
f61dc9902248 rancher/agent:v1.2.11 "/run.sh run" 2 days ago Up 5d
root@a-onap-auto-20180412-ref:~# kubectl get pods --all-namespaces | grep log
onap30 seconds dev-log-elasticsearch-66cdc4f855-wmpkz 1/1 Running rancher-agent
01f40fa3a4ed 0rancher/server:v1.6.25 "/usr/bin/entry /u..." 2 days ago 5d onap Up dev-log-kibana-5b6f86bcb4-drpzq30 seconds 3306/tcp, 0.0.0.0:8880->8080/tcp rancher_server
# back 0/1 Runningup
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces
NAMESPACE NAME 1076 5d onap dev-log-logstash-6d9fdccdb6-ngq2f 1/1READY STATUS Running RESTARTS AGE
kube-system heapster-7b48b696fc-99cd6 0 5d onap dev-vfc-catalog-7d89bc8b9d-vxk740/1 Error 0 2/2 Running 2d
kube-system kube-dns-6655f78c68-k4dh4 0 5d root@a-onap-auto-20180412-ref:~# kubectl delete pod dev-log-kibana-5b6f86bcb4-drpzq -n onap
pod "dev-log-kibana-5b6f86bcb4-drpzq" deleted
root@a-onap-auto-20180412-ref:~# kubectl get pods --all-namespaces | grep log
onap0/3 Error 0 2d
dev-log-elasticsearch-66cdc4f855-wmpkzkube-system kubernetes-dashboard-6f54f7c4b-fhqmf 10/1 RunningError 0 0 2d
kube-system monitoring-grafana-7877679464-cscg4 5d onap 0/1 dev-log-kibana-5b6f86bcb4-drpzq Completed 0 0/1 2d
kube-system monitoring-influxdb-64664c6cf5-wmw8w Terminating 10760/1 5dCompleted onap 0 dev-log-kibana-5b6f86bcb4-gpn2m 2d
kube-system tiller-deploy-78db58d887-9qlwh 0/1 Pending 1/1 Running 0 12s2d
onap devonap-aaf-logaaf-logstashsms-6d9fdccdb6-ngq2fpreload-k7mx6 10/1 RunningCompleted 0 2d
onap 0 onap-aaf-aaf-sshsm-distcenter-lk5st 5d onap dev-vfc-catalog-7d89bc8b9d-vxk74 0/1 Completed 0 2/2 Running2d
....
# note not all replicas were actually 1 - some were 2,3,7
kubectl scale 0--replicas=1 deployments --all -n onap
kubectl scale --replicas=1 statefulsets 5d |
...
a helm namespace delete or a kubectl delete or a helm purge may not remove everything based on hanging PVs - use
Code Block |
---|
|
#after a kubectl delete namespace onap
sudo helm delete --purge onap
melliott [12:11 PM]
kubectl delete pods <pod> --grace-period=0 --force -n onap |
Remove a Deployment
https://git.onap.org/logging-analytics/tree/deploy/cd.sh#n57
Code Block |
---|
|
kubectl delete namespace onap
sudo helm delete --purge onap
kubectl delete pv --all
kubectl delete pvc --all
kubectl delete secrets --all
kubectl delete clusterrolebinding --all--all -n onap
# 6m
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | grep -E '0/|1/2|1/3|2/3' | wc -l
199
# 20m
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | grep -E '0/|1/2|1/3|2/3' | wc -l
180
# 60 min
ubuntu@a-ld0:~$ kubectl get pods --all-namespaces | grep -E '0/|1/2|1/3|2/3' | wc -l
42 |
Remove a Deployment
Cloud Native Deployment#RemoveaDeployment
Rotate Logs
find them
du --max-depth=1 | sort -nr
...
Make sure the robot container is deployed - you may run directly from the kubernetes folder outside of the container - see https://git.onap.org/logging-analytics/tree/deploy/cd.sh#n297
Code Block |
---|
|
# make sure the robot container is up via --set robot.enabled=true
cd oom/kubernetes/robot
./ete-k8s.sh $ENVIRON health |
...