Spark submit fails on Kubernetes (EKS) with "invalid null input: name"

7/5/2020

I am trying to run spark sample SparkPi docker image on EKS. My Spark version is 3.0.
I created spark serviceaccount and role binding. When I submit the job, there is error below:

2020-07-05T12:19:40.862635502Z Exception in thread "main" java.io.IOException: failure to login
2020-07-05T12:19:40.862756537Z 	at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:841)
2020-07-05T12:19:40.862772672Z 	at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:777)
2020-07-05T12:19:40.862777401Z 	at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:650)
2020-07-05T12:19:40.862788327Z 	at org.apache.spark.util.Utils$.$anonfun$getCurrentUserName$1(Utils.scala:2412)
2020-07-05T12:19:40.862792294Z 	at scala.Option.getOrElse(Option.scala:189)
2020-07-05T12:19:40.8628321Z 	at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2412)
2020-07-05T12:19:40.862836906Z 	at org.apache.spark.deploy.k8s.features.BasicDriverFeatureStep.configurePod(BasicDriverFeatureStep.scala:119)
2020-07-05T12:19:40.862907673Z 	at org.apache.spark.deploy.k8s.submit.KubernetesDriverBuilder.$anonfun$buildFromFeatures$3(KubernetesDriverBuilder.scala:59)
2020-07-05T12:19:40.862917119Z 	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
2020-07-05T12:19:40.86294845Z 	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
2020-07-05T12:19:40.862964245Z 	at scala.collection.immutable.List.foldLeft(List.scala:89)
2020-07-05T12:19:40.862979665Z 	at org.apache.spark.deploy.k8s.submit.KubernetesDriverBuilder.buildFromFeatures(KubernetesDriverBuilder.scala:58)
2020-07-05T12:19:40.863055425Z 	at org.apache.spark.deploy.k8s.submit.Client.run(KubernetesClientApplication.scala:98)
2020-07-05T12:19:40.863060434Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.$anonfun$run$4(KubernetesClientApplication.scala:221)
2020-07-05T12:19:40.863096062Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.$anonfun$run$4$adapted(KubernetesClientApplication.scala:215)
2020-07-05T12:19:40.863103831Z 	at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2539)
2020-07-05T12:19:40.863163804Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.run(KubernetesClientApplication.scala:215)
2020-07-05T12:19:40.863168546Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.start(KubernetesClientApplication.scala:188)
2020-07-05T12:19:40.863194449Z 	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928)
2020-07-05T12:19:40.863218817Z 	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
2020-07-05T12:19:40.863246594Z 	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
2020-07-05T12:19:40.863252341Z 	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
2020-07-05T12:19:40.863277236Z 	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007)
2020-07-05T12:19:40.863314173Z 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016)
2020-07-05T12:19:40.863319847Z 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
2020-07-05T12:19:40.863653699Z Caused by: javax.security.auth.login.LoginException: java.lang.NullPointerException: invalid null input: name
2020-07-05T12:19:40.863660447Z 	at com.sun.security.auth.UnixPrincipal.<init>(UnixPrincipal.java:71)
2020-07-05T12:19:40.863663683Z 	at com.sun.security.auth.module.UnixLoginModule.login(UnixLoginModule.java:133)
2020-07-05T12:19:40.863667173Z 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
2020-07-05T12:19:40.863670199Z 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
2020-07-05T12:19:40.863673467Z 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
2020-07-05T12:19:40.86367674Z 	at java.lang.reflect.Method.invoke(Method.java:498)
2020-07-05T12:19:40.863680205Z 	at javax.security.auth.login.LoginContext.invoke(LoginContext.java:755)
2020-07-05T12:19:40.863683401Z 	at javax.security.auth.login.LoginContext.access$000(LoginContext.java:195)
2020-07-05T12:19:40.86368671Z 	at javax.security.auth.login.LoginContext$4.run(LoginContext.java:682)
2020-07-05T12:19:40.863689794Z 	at javax.security.auth.login.LoginContext$4.run(LoginContext.java:680)
2020-07-05T12:19:40.863693081Z 	at java.security.AccessController.doPrivileged(Native Method)
2020-07-05T12:19:40.863696183Z 	at javax.security.auth.login.LoginContext.invokePriv(LoginContext.java:680)
2020-07-05T12:19:40.863698579Z 	at javax.security.auth.login.LoginContext.login(LoginContext.java:587)
2020-07-05T12:19:40.863700844Z 	at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:815)
2020-07-05T12:19:40.863703393Z 	at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:777)
2020-07-05T12:19:40.86370659Z 	at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:650)
2020-07-05T12:19:40.863709809Z 	at org.apache.spark.util.Utils$.$anonfun$getCurrentUserName$1(Utils.scala:2412)
2020-07-05T12:19:40.863712847Z 	at scala.Option.getOrElse(Option.scala:189)
2020-07-05T12:19:40.863716102Z 	at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2412)
2020-07-05T12:19:40.863719273Z 	at org.apache.spark.deploy.k8s.features.BasicDriverFeatureStep.configurePod(BasicDriverFeatureStep.scala:119)
2020-07-05T12:19:40.86372651Z 	at org.apache.spark.deploy.k8s.submit.KubernetesDriverBuilder.$anonfun$buildFromFeatures$3(KubernetesDriverBuilder.scala:59)
2020-07-05T12:19:40.863728947Z 	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
2020-07-05T12:19:40.863731207Z 	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
2020-07-05T12:19:40.863733458Z 	at scala.collection.immutable.List.foldLeft(List.scala:89)
2020-07-05T12:19:40.863736237Z 	at org.apache.spark.deploy.k8s.submit.KubernetesDriverBuilder.buildFromFeatures(KubernetesDriverBuilder.scala:58)
2020-07-05T12:19:40.863738769Z 	at org.apache.spark.deploy.k8s.submit.Client.run(KubernetesClientApplication.scala:98)
2020-07-05T12:19:40.863742105Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.$anonfun$run$4(KubernetesClientApplication.scala:221)
2020-07-05T12:19:40.863745486Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.$anonfun$run$4$adapted(KubernetesClientApplication.scala:215)
2020-07-05T12:19:40.863749154Z 	at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2539)
2020-07-05T12:19:40.863752601Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.run(KubernetesClientApplication.scala:215)
2020-07-05T12:19:40.863756118Z 	at org.apache.spark.deploy.k8s.submit.KubernetesClientApplication.start(KubernetesClientApplication.scala:188)
2020-07-05T12:19:40.863759673Z 	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928)
2020-07-05T12:19:40.863762774Z 	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
2020-07-05T12:19:40.863765929Z 	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
2020-07-05T12:19:40.86376906Z 	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
2020-07-05T12:19:40.863792673Z 	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007)
2020-07-05T12:19:40.863797161Z 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016)
2020-07-05T12:19:40.863799703Z 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
2020-07-05T12:19:40.863802085Z 
2020-07-05T12:19:40.863804184Z 	at javax.security.auth.login.LoginContext.invoke(LoginContext.java:856)
2020-07-05T12:19:40.863806454Z 	at javax.security.auth.login.LoginContext.access$000(LoginContext.java:195)
2020-07-05T12:19:40.863808705Z 	at javax.security.auth.login.LoginContext$4.run(LoginContext.java:682)
2020-07-05T12:19:40.863811134Z 	at javax.security.auth.login.LoginContext$4.run(LoginContext.java:680)
2020-07-05T12:19:40.863815328Z 	at java.security.AccessController.doPrivileged(Native Method)
2020-07-05T12:19:40.863817575Z 	at javax.security.auth.login.LoginContext.invokePriv(LoginContext.java:680)
2020-07-05T12:19:40.863819856Z 	at javax.security.auth.login.LoginContext.login(LoginContext.java:587)
2020-07-05T12:19:40.863829171Z 	at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:815)
2020-07-05T12:19:40.86385963Z 	... 24 more

My deployments are:

apiVersion: v1
kind: Namespace
metadata:
  name: helios
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: spark
  namespace: helios
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: spark-role-binding
  namespace: helios
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: edit
subjects:
  - kind: ServiceAccount
    name: spark
    namespace: helios
---
apiVersion: batch/v1
kind: Job
metadata:
  name: spark-pi
  namespace: helios
spec:
  template:
    spec:
      containers:
        - name: spark-pi
          image: <registry>/spark-pi-3.0
          command: [
            "/bin/sh",
            "-c",
            "/opt/spark/bin/spark-submit \
            --master k8s://https://<EKS_API_SERVER> \
            --deploy-mode cluster \
            --name spark-pi \
            --class org.apache.spark.examples.SparkPi \
            --conf spark.kubernetes.namespace=helios
            --conf spark.executor.instances=2 \
            --conf spark.executor.memory=2G \
            --conf spark.executor.cores=2 \
            --conf spark.kubernetes.container.image=<registry>/spark-pi-3.0 \
            --conf spark.kubernetes.container.image.pullPolicy=Always \
            --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
            --conf spark.jars.ivy=/tmp/.ivy
            local:///opt/spark/examples/jars/spark-examples_2.11-2.4.0.jar"
          ]
      serviceAccountName: spark
      restartPolicy: Never

The docker image is created using OOTB dockerfile provided in Spark installation.

docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .

What am I doing wrong here? Please help.

SOLUTION
Finally it worked out after I comment the below line from docker file.

USER ${spark_uid}

Though, now, container is running as root but at least it is working.

-- NumeroUno
amazon-eks
apache-spark
docker
kubernetes

6 Answers

4/9/2021

I had the same problem. I solved it by adding into submit container

export SPARK_USER=spark3

without comment line USER ${spark_uid}

-- nub 228
Source: StackOverflow

3/16/2021

I had the same problem. I solved it by changing the k8s job.

Hadoop is failing to find a username for the user. You can see the problem by running whoami in the container, which yields whoami: cannot find name for user ID 185. The spark image entrypoint.sh contains code to add the user to /etc/passwd, which sets a username. However command bypasses the entrypoint.sh, so instead you should use args like so:

      containers:
        - name: spark-pi
          image: <registry>/spark-pi-3.0
      args: [
            "/bin/sh",
            "-c",
            "/opt/spark/bin/spark-submit \
            --master k8s://https://10.100.0.1:443 \
            --deploy-mode cluster ..."
      ]
-- Kieran
Source: StackOverflow

7/5/2020

Seems like you are missing the ServiceAccount/AWS role credentials so that your job can connect to the EKS cluster.

I recommend you set up fine-grained IAM roles for service accounts.

Basically, you would have something like this (after you set up the roles in AWS):

apiVersion: v1
kind: ServiceAccount
metadata:
  annotations:
    eks.amazonaws.com/role-arn: arn:aws:iam::123456789012:role/my-serviceaccount-Role1
  name: spark
  namespace: helios

Then your job would look something like this:

apiVersion: batch/v1
kind: Job
metadata:
  name: spark-pi
  namespace: helios
spec:
  template:
    spec:
      containers:
        - name: spark-pi
          image: <registry>/spark-pi-3.0
          command: [
            "/bin/sh",
            "-c",
            "/opt/spark/bin/spark-submit \
            --master k8s://https://<EKS_API_SERVER> \
            --deploy-mode cluster \
            --name spark-pi \
            --class org.apache.spark.examples.SparkPi \
            --conf spark.kubernetes.namespace=helios
            --conf spark.executor.instances=2 \
            --conf spark.executor.memory=2G \
            --conf spark.executor.cores=2 \
            --conf spark.kubernetes.container.image=<registry>/spark-pi-3.0 \
            --conf spark.kubernetes.container.image.pullPolicy=Always \
            --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
            --conf spark.jars.ivy=/tmp/.ivy
            local:///opt/spark/examples/jars/spark-examples_2.11-2.4.0.jar" ]
          env: 
          - name: AWS_ROLE_ARN
            value: arn:aws:iam::123456789012:role/my-serviceaccount-Role1
          - name: AWS_WEB_IDENTITY_TOKEN_FILE
            value: /var/run/secrets/eks.amazonaws.com/serviceaccount/token
          volumeMounts:
          - mountPath: /var/run/secrets/eks.amazonaws.com/serviceaccount
              name: aws-iam-token
              readOnly: true
      serviceAccountName: spark
      restartPolicy: Never
-- Rico
Source: StackOverflow

7/12/2020

Finally it worked out after I comment the below line from docker file.

USER ${spark_uid} Though, now, container is running as root but at least it is working.

-- NumeroUno
Source: StackOverflow

2/3/2022

I ran into the same issue and was able to resolve it by specifying runAsUser on the pod spec without having to modify the spark docker image.

securityContext:
  runAsUser: 65534
  runAsGroup: 65534
-- vikasn
Source: StackOverflow

1/31/2022

I had the same issue, fixed it by adding

RUN echo 1000:x:1000:0:anonymous uid:/opt/spark:/bin/false >> /etc/passwd line in the last part Spark Dockerfile

RUN echo '1000:x:1000:0:anonymous uid:/opt/spark:/bin/false' >> /etc/passwd
ENTRYPOINT [ "/opt/entrypoint.sh" ]
# Specify the User that the actual main process will run as
USER ${spark_uid}

so full dockerfile looks like this

cat spark-3.2.0-bin-hadoop3.2/kubernetes/dockerfiles/spark/Dockerfile

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
ARG ROOT_CONTAINER=ubuntu:focal

FROM ${ROOT_CONTAINER}

ARG openjdk_version="8"
ARG spark_uid=1000

# Before building the docker image, first build and make a Spark distribution following
# the instructions in http://spark.apache.org/docs/latest/building-spark.html.
# If this docker file is being used in the context of building your images from a Spark
# distribution, the docker build command should be invoked from the top level directory
# of the Spark distribution. E.g.:
# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .
RUN apt-get update --yes && \
    apt-get install --yes --no-install-recommends \
    "openjdk-${openjdk_version}-jre-headless" \
    ca-certificates-java 

RUN apt-get install --yes software-properties-common 
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get update && apt-get install -y \
        python3.7 \
        python3-pip \
        python3-distutils \
        python3-setuptools
RUN pip install pyspark==3.2.0

RUN set -ex && \
    sed -i 's/http:\/\/deb.\(.*\)/https:\/\/deb.\1/g' /etc/apt/sources.list && \
    apt-get update && \
    ln -s /lib /lib64 && \
    export DEBIAN_FRONTEND=noninteractive && \
    apt install -y -qq bash tini libc6 libpam-modules krb5-user libnss3 procps  && \
    mkdir -p /opt/spark && \
    mkdir -p /opt/spark/examples && \
    mkdir -p /opt/spark/work-dir && \
    mkdir -p /etc/metrics/conf/ && \
    mkdir -p /opt/hadoop/ && \
    touch /opt/spark/RELEASE && \
    rm /bin/sh && \
    ln -sv /bin/bash /bin/sh && \
    echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
    chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
    apt-get clean && rm -rf /var/lib/apt/lists/* \
    rm -rf /var/cache/apt/* 

COPY jars /opt/spark/jars
COPY bin /opt/spark/bin
COPY sbin /opt/spark/sbin
COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/
COPY kubernetes/dockerfiles/spark/decom.sh /opt/
COPY examples /opt/spark/examples
COPY kubernetes/tests /opt/spark/tests
COPY data /opt/spark/data
COPY conf/prometheus.yaml /etc/metrics/conf/


ENV SPARK_HOME /opt/spark
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir
RUN chmod a+x /opt/decom.sh

RUN  mkdir -p /opt/spark/logs && \
     chown -R 1000:1000 /opt/spark/logs

RUN echo '1000:x:1000:0:anonymous uid:/opt/spark:/bin/false' >> /etc/passwd
RUN cat /etc/passwd
ENTRYPOINT [ "/opt/entrypoint.sh" ]
# Specify the User that the actual main process will run as
USER ${spark_uid}

Build spark-docker image

sudo ./bin/docker-image-tool.sh -r <my_docker_repo>/spark-3.2.0-bin-hadoop3.2-gcs -t <tag_number> build

-- Manju N
Source: StackOverflow