Uploaded image for project: 'Jenkins'
  1. Jenkins
  2. JENKINS-54988

jenkins queue is locked (not able to start any build) because KubernetesSlave._terminate stuck

    XMLWordPrintable

    Details

    • Similar Issues:

      Description

      Hi guys apparently this call can hang forever and it will hold jenkins queue in locked state so no build will be able to start.  
      https://github.com/jenkinsci/kubernetes-plugin/blob/master/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java#L262

      Here is a stacktrace from a threaddump and a screenshot

      "jenkins.util.Timer [#6] / waiting for JNLP4-connect connection from 10.77.102.75/10.77.102.75:57072 id=77127042" daemon prio=5 TIMED_WAITING
      	java.lang.Object.wait(Native Method)
      	hudson.remoting.Request.call(Request.java:177)
      	hudson.remoting.Channel.call(Channel.java:954)
      	org.csanchez.jenkins.plugins.kubernetes.KubernetesSlave._terminate(KubernetesSlave.java:236)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:67)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.SlaveComputer$4.run(SlaveComputer.java:843)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.SlaveComputer.setNode(SlaveComputer.java:840)
      	hudson.model.AbstractCIBase.updateComputer(AbstractCIBase.java:121)
      	hudson.model.AbstractCIBase.access$000(AbstractCIBase.java:46)
      	hudson.model.AbstractCIBase$2.run(AbstractCIBase.java:207)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.model.AbstractCIBase.updateComputerList(AbstractCIBase.java:190)
      	jenkins.model.Jenkins.updateComputerList(Jenkins.java:1552)
      	jenkins.model.Nodes$6.run(Nodes.java:261)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	jenkins.model.Nodes.removeNode(Nodes.java:252)
      	jenkins.model.Jenkins.removeNode(Jenkins.java:2066)
      	hudson.slaves.AbstractCloudSlave.terminate(AbstractCloudSlave.java:70)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:59)
      	hudson.slaves.CloudRetentionStrategy.check(CloudRetentionStrategy.java:43)
      	hudson.slaves.ComputerRetentionWork$1.run(ComputerRetentionWork.java:72)
      	hudson.model.Queue._withLock(Queue.java:1380)
      	hudson.model.Queue.withLock(Queue.java:1257)
      	hudson.slaves.ComputerRetentionWork.doRun(ComputerRetentionWork.java:63)
      	hudson.triggers.SafeTimerTask.run(SafeTimerTask.java:72)
      	jenkins.security.ImpersonatingScheduledExecutorService$1.run(ImpersonatingScheduledExecutorService.java:58)
      	java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
      	java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
      	java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
      	java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
      	java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
      	java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
      	java.lang.Thread.run(Thread.java:748)
      

        Attachments

          Issue Links

            Activity

            Show
            jglick Jesse Glick added a comment - https://github.com/jenkinsci/kubernetes-plugin/blob/4ce4fabd7c0f07964b98ebd55e7a334640595a31/src/main/java/org/csanchez/jenkins/plugins/kubernetes/KubernetesSlave.java#L263 that is (permalink). Probably callAsync would suffice here.

              People

              Assignee:
              Unassigned Unassigned
              Reporter:
              iceiceice Alexey Grigorov
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

                Dates

                Created:
                Updated:
                Resolved: