Why ingress-nginx-controller pod always establishes IPv4 TCP connection to endpoint?

4/6/2021

We have brought up k8s cluster on 2 VMs with dualstack feature enabled. Lets call them nodeA , nodeB. nodeA is master node, nodeB is worker node.

Following output gives info about cluster configuration

# ps -eaf | grep kubeapi
root     2593936 2593910  4 Mar16 ?        1-00:16:14 kube-apiserver --advertise-address=10.4.0.85 
--allow-privileged=true --authorization-mode=Node,RBAC --client-ca-file=/etc/kubernetes/pki/ca.crt 
--enable-admission-plugins=NodeRestriction --enable-bootstrap-token-auth=true --etcd- 
cafile=/etc/kubernetes/pki/etcd/ca.crt --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd- 
client.crt --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key --etcd- 
servers=https://127.0.0.1:2379 --feature-gates=IPv6DualStack=true --insecure-port=0 --kubelet- 
client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt --kubelet-client- 
key=/etc/kubernetes/pki/apiserver-kubelet-client.key --kubelet-preferred-address- 
types=InternalIP,ExternalIP,Hostname --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy- 
client.crt --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key --requestheader- 
allowed-names=front-proxy-client --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy- 
ca.crt --requestheader-extra-headers-prefix=X-Remote-Extra- --requestheader-group-headers=X-Remote- 
Group --requestheader-username-headers=X-Remote-User --secure-port=6443 --service-account- 
issuer=https://kubernetes.default.svc.cluster.local --service-account-key- 
file=/etc/kubernetes/pki/sa.pub --service-account-signing-key-file=/etc/kubernetes/pki/sa.key -- 
service-cluster-ip-range=10.244.1.0/24,2001:db8:1234:5678:8:2::/112 --tls-cert- 
file=/etc/kubernetes/pki/apiserver.crt --tls-private-key-file=/etc/kubernetes/pki/apiserver.key

We have created ClusterIP service apple-service and ingress-nginx-controller nodePort service as shown below

# kubectl describe svc apple-service
Name:              apple-service
Namespace:         default
Labels:            <none>
Annotations:       <none>
Selector:          app=apple
Type:              ClusterIP
IP Family Policy:  PreferDualStack
IP Families:       IPv4,IPv6
IP:                10.244.1.104
IPs:               10.244.1.104,2001:db8:1234:5678:8:2:0:6294
Port:              <unset>  5678/TCP
TargetPort:        5678/TCP
Endpoints:         10.244.2.150:5678,10.244.2.151:5678
Session Affinity:  None
Events:            <none>

# kubectl describe svc ingress-nginx-controller -n ingress-nginx
Name:                     ingress-nginx-controller
Namespace:                ingress-nginx
Labels:                   app.kubernetes.io/component=controller
                          app.kubernetes.io/instance=ingress-nginx
                          app.kubernetes.io/managed-by=Helm
                          app.kubernetes.io/name=ingress-nginx
                          app.kubernetes.io/version=0.44.0
                      helm.sh/chart=ingress-nginx-3.23.0
Annotations:              <none>
Selector:                 
app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress- 
nginx,app.kubernetes.io/name=ingress-nginx
Type:                     NodePort
IP Family Policy:         PreferDualStack
IP Families:              IPv4,IPv6
IP:                       10.244.1.4
IPs:                      10.244.1.4,2001:db8:1234:5678:8:2:0:6033
Port:                     http  80/TCP
TargetPort:               http/TCP
NodePort:                 http  31003/TCP
Endpoints:                10.244.2.144:80
Port:                     https  443/TCP
TargetPort:               https/TCP
NodePort:                 https  31801/TCP
Endpoints:                10.244.2.144:443
Session Affinity:         None
External Traffic Policy:  Cluster
Events:                   <none>


# kubectl describe ep apple-service
Name:         apple-service
Namespace:    default
Labels:       <none>
Annotations:  endpoints.kubernetes.io/last-change-trigger-time: 2021-04-01T12:53:37Z
Subsets:
Addresses:          10.244.2.150,10.244.2.151
 NotReadyAddresses:  <none>
 Ports:
  Name     Port  Protocol
  ----     ----  --------
  <unset>  5678  TCP

Events:  <none>

We have created following ingress resource.

# kubectl describe ingress
Name:             example-ingress
Namespace:        default
Address:          10.11.0.58
Default backend:  default-http-backend:80 (<error: endpoints "default-http-backend" not found>)
Rules:
  Host        Path  Backends
  ----        ----  --------
  *
          /apple   apple-service:5678 (10.244.2.150:5678,10.244.2.151:5678)
Annotations:  nginx.ingress.kubernetes.io/rewrite-target: /
Events:       <none>

We have created 2 pods behind ClusterIP service , which have dual-stack addresses.

# kubectl describe pod apple-app-1
Name:         apple-app-1
Namespace:    default
Priority:     0
Node:         ccd-focal-clus1-2/10.11.0.58
Start Time:   Thu, 01 Apr 2021 12:38:56 +0000
Labels:       app=apple
Annotations:  cni.projectcalico.org/podIP: 10.244.2.150/32
          cni.projectcalico.org/podIPs: 10.244.2.150/32,2001:db8:1234:5678:8:3:0:3295/128
Status:       Running
IP:           10.244.2.150
IPs:
 IP:  10.244.2.150
 IP:  2001:db8:1234:5678:8:3:0:3295
 ...

# kubectl describe pod apple-app-2
Name:         apple-app-2
Namespace:    default
Priority:     0
Node:         ccd-focal-clus1-2/10.11.0.58
Start Time:   Thu, 01 Apr 2021 12:53:34 +0000
Labels:       app=apple
Annotations:  cni.projectcalico.org/podIP: 10.244.2.151/32
          cni.projectcalico.org/podIPs: 10.244.2.151/32,2001:db8:1234:5678:8:3:0:3296/128
Status:       Running
IP:           10.244.2.151
IPs:
  IP:  10.244.2.151
  IP:  2001:db8:1234:5678:8:3:0:3296

The ingress-nginx-controller pod details

# kubectl describe pods ingress-nginx-controller-67897c9494-s4fkw -n ingress-nginx
Name:         ingress-nginx-controller-67897c9494-s4fkw
Namespace:    ingress-nginx
Priority:     0
Node:         ccd-focal-clus1-2/10.11.0.58
Start Time:   Wed, 31 Mar 2021 14:53:49 +0000
Labels:       app.kubernetes.io/component=controller
          app.kubernetes.io/instance=ingress-nginx
          app.kubernetes.io/name=ingress-nginx
          pod-template-hash=67897c9494
Annotations:  cni.projectcalico.org/podIP: 10.244.2.144/32
          cni.projectcalico.org/podIPs: 10.244.2.144/32,2001:db8:1234:5678:8:3:0:328f/128
Status:       Running
IP:           10.244.2.144
IPs:
 IP:           10.244.2.144
 IP:           2001:db8:1234:5678:8:3:0:328f

nodeA (master) ifconfig output as follows:

ens3: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
    inet 10.11.0.137  netmask 255.255.255.0  broadcast 10.11.0.255
    inet6 2001:db8:100:c1::287  prefixlen 128  scopeid 0x0<global>
    inet6 fe80::f816:3eff:fed5:1b32  prefixlen 64  scopeid 0x20<link>
    ether fa:16:3e:d5:1b:32  txqueuelen 1000  (Ethernet)
    RX packets 350743  bytes 51111859 (51.1 MB)
    RX errors 0  dropped 0  overruns 0  frame 0
    TX packets 4874839  bytes 386319524 (386.3 MB)
    TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens4: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
    inet 10.4.0.85  netmask 255.255.255.0  broadcast 10.4.0.255
    inet6 2001:db8:100:a1::1a6  prefixlen 128  scopeid 0x0<global>
    inet6 fe80::f816:3eff:fe53:adc5  prefixlen 64  scopeid 0x20<link>
    ether fa:16:3e:53:ad:c5  txqueuelen 1000  (Ethernet)
    RX packets 6942419  bytes 1695386713 (1.6 GB)
    RX errors 0  dropped 0  overruns 0  frame 0
    TX packets 8330231  bytes 2487914325 (2.4 GB)
    TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

nodeB (worker) ifconfig output as follows:

ens3: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
    inet 10.11.0.58  netmask 255.255.255.0  broadcast 10.11.0.255
    inet6 2001:db8:100:c1::12a  prefixlen 128  scopeid 0x0<global>
    inet6 fe80::f816:3eff:fe6e:35da  prefixlen 64  scopeid 0x20<link>
    ether fa:16:3e:6e:35:da  txqueuelen 1000  (Ethernet)
    RX packets 134737  bytes 15951676 (15.9 MB)
    RX errors 0  dropped 0  overruns 0  frame 0
    TX packets 153842  bytes 35255446 (35.2 MB)
    TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens4: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
    inet 10.4.0.45  netmask 255.255.255.0  broadcast 10.4.0.255
    inet6 fe80::f816:3eff:fe15:91ba  prefixlen 64  scopeid 0x20<link>
    inet6 2001:db8:100:a1::9  prefixlen 128  scopeid 0x0<global>
    ether fa:16:3e:15:91:ba  txqueuelen 1000  (Ethernet)
    RX packets 6918097  bytes 3530410872 (3.5 GB)
    RX errors 0  dropped 0  overruns 0  frame 0
    TX packets 5392656  bytes 638310030 (638.3 MB)
    TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

When we execute "curl -LO http://[2001:db8:100:c1::12a]:31003/apple" , we see that IPv6 TCP connection is established with ingress-nginx-controller address 2001:db8:1234:5678:8:3:0:328f . From ingress-nginx-controller pod to backend pod , IPv4 TCP connection is establised. i.e 10.244.2.144 --> 10.244.2.151 (apple-app-2).

Below picture shows the tcpdump output taken on ingress-nginx-controller pod.

enter image description here

-- Hanamantagoud
kubernetes
kubernetes-ingress
nginx

1 Answer

4/23/2021

As you can find in the Validate IPv4/IPv6 dual-stack documentation - the order of ipFamilies in the .spec.ipFamilies array matters.

Kubernetes will assign both IPv4 and IPv6 addresses (as this cluster has dual-stack enabled) and select the .spec.ClusterIP from the list of .spec.ClusterIPs based on the address family of the first element in the .spec.ipFamilies array.

You specified IPv4 as the first array element in .spec.ipFamilies, therefore Kubernetes assigned a cluster IP for this Service from the IPv4 range.

# kubectl describe svc apple-service
Name:              apple-service
...
IP Family Policy:  PreferDualStack
IP Families:       IPv4,IPv6
IP:                10.244.1.104

Changing the order of ipFamilies will solve this problem:

spec:
  ipFamilies:
  - IPv6
  - IPv4

I've created a simple example to illustrate how it works.

I have a single app-1 Deployment exposed using the ClusterIP service:

# kubectl get deploy,pod,svc
NAME                    READY   UP-TO-DATE   AVAILABLE   AGE
deployment.apps/app-1   1/1     1            1           52m

NAME                         READY   STATUS    RESTARTS   AGE
pod/app-1-5d9ccdb595-nrljs   1/1     Running   0          52m

NAME                 TYPE        CLUSTER-IP            EXTERNAL-IP   PORT(S)   AGE
service/app-1        ClusterIP   2001:db8:42:1::f4a0   <none>        80/TCP    19m

# kubectl describe ing
Name:             example-ingress
Namespace:        default
Address:          10.0.0.5
Default backend:  default-http-backend:80 (<error: endpoints "default-http-backend" not found>)
Rules:
  Host        Path  Backends
  ----        ----  --------
  *           
              /app-1   app-1:80 ([2001:db8:42:cd:2fba:8d83:9906:4d8f]:80)

# kubectl get svc ingress-nginx-controller -n ingress-nginx
NAME                       TYPE       CLUSTER-IP     EXTERNAL-IP   PORT(S)                      AGE
ingress-nginx-controller   NodePort   10.96.60.120   <none>        80:31072/TCP,443:30408/TCP   133m

Let's take a look at the manifest of the app-1 Service:
NOTE: IPv6 is the first element in the .spec.ipFamilies array.

# cat svc.yml 
apiVersion: v1
kind: Service
metadata:
  labels:
    app: app-1
  name: app-1
  namespace: default
spec:
  ipFamilies:
  - IPv6    
  - IPv4
  ipFamilyPolicy: PreferDualStack
  ports:
  - port: 80
    protocol: TCP
    targetPort: 80
  selector:
    app: app-1
  sessionAffinity: None
  type: ClusterIP

We can check if it works as expected:
NOTE: It doesn't matter if we run curl public_ipv4_address:31072/app-1 or curl [public_ipv6_address]:31072/app-1.

# kubectl exec -it app-1-5d9ccdb595-nrljs -- bash
root@app-1-5d9ccdb595-nrljs:/# tcpdump -n 'port 80'
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
11:02:06.918674 IP6 2001:db8:42:cd:2fba:8d83:9906:4d8e.54406 > 2001:db8:42:cd:2fba:8d83:9906:4d8f.80: Flags [S], seq 3544899375, win 64860, options [mss 1380,sackOK,TS val 2211355791 ecr 0,nop,wscale 7], length 0
11:02:06.918705 IP6 2001:db8:42:cd:2fba:8d83:9906:4d8f.80 > 2001:db8:42:cd:2fba:8d83:9906:4d8e.54406: Flags [S.], seq 1030071611, ack 3544899376, win 64296, options [mss 1380,sackOK,TS val 1923799370 ecr 2211355791,nop,wscale 7], length 0
11:02:06.918736 IP6 2001:db8:42:cd:2fba:8d83:9906:4d8e.54406 > 2001:db8:42:cd:2fba:8d83:9906:4d8f.80: Flags [.], ack 1, win 507, options [nop,nop,TS val 2211355791 ecr 1923799370], length 0
11:02:06.918787 IP6 2001:db8:42:cd:2fba:8d83:9906:4d8e.54406 > 2001:db8:42:cd:2fba:8d83:9906:4d8f.80: Flags [P.], seq 1:353, ack 1, win 507, options [nop,nop,TS val 2211355791 ecr 1923799370], length 352: HTTP: GET / HTTP/1.1
11:02:06.918794 IP6 2001:db8:42:cd:2fba:8d83:9906:4d8f.80 > 2001:db8:42:cd:2fba:8d83:9906:4d8e.54406: Flags [.], ack 353, win 502, options [nop,nop,TS val 1923799370 ecr 2211355791], length 0
11:02:06.919421 IP6 2001:db8:42:cd:2fba:8d83:9906:4d8f.80 > 2001:db8:42:cd:2fba:8d83:9906:4d8e.54406: Flags [P.], seq 1:240, ack 353, win 502, options [nop,nop,TS val 1923799371 ecr 2211355791], length 239: HTTP: HTTP/1.1 200 OK
11:02:06.919455 IP6 2001:db8:42:cd:2fba:8d83:9906:4d8e.54406 > 2001:db8:42:cd:2fba:8d83:9906:4d8f.80: Flags [.], ack 240, win 506, options [nop,nop,TS val 2211355792 ecr 1923799371], length 0
-- matt_j
Source: StackOverflow