-
Type:
Bug
-
Resolution: Fixed
-
Priority:
Major
-
Component/s: ec2-plugin
-
None
-
Environment:Jenkins ver. 2.138.2 + EC2 1.39
Our Jenkins master (0 executors) will occasionally crash. Looking at the health-check logs it seems like there is a thread deadlock with the ec2 plugin?
Â
Starting health checks at Mon Oct 29 05:32:10 UTC 2018
Health check results at Mon Oct 29 05:32:10 UTC 2018:
* disk-space: Result{isHealthy=true, timestamp=2018-10-29T05:32:10.037Z}
* plugins: Result{isHealthy=true, message=No failed plugins, timestamp=2018-10-29T05:32:10.038Z}
* temporary-space: Result{isHealthy=true, timestamp=2018-10-29T05:32:10.038Z}
* thread-deadlock: Result{isHealthy=false, message=[Computer.threadPoolForRemoting [#9788] locked on java.util.concurrent.locks.ReentrantLock$NonfairSync@20e93cc3 (owned by jenkins.util.Timer [#7]):
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
at java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)
at hudson.slaves.NodeProvisioner.update(NodeProvisioner.java:190)
at hudson.slaves.NodeProvisioner.access$000(NodeProvisioner.java:61)
at hudson.slaves.NodeProvisioner$1.run(NodeProvisioner.java:176)
at jenkins.util.ContextResettingExecutorService$1.run(ContextResettingExecutorService.java:28)
at jenkins.security.ImpersonatingExecutorService$1.run(ImpersonatingExecutorService.java:59)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
, jenkins.util.Timer [#9] locked on hudson.plugins.ec2.AmazonEC2Cloud@3fdbeb4b (owned by jenkins.util.Timer [#7]):
at hudson.plugins.ec2.EC2Cloud.connect(EC2Cloud.java:638)
at hudson.plugins.ec2.EC2SpotSlave.getSpotRequest(EC2SpotSlave.java:114)
at hudson.plugins.ec2.EC2SpotSlave.getInstanceId(EC2SpotSlave.java:155)
at hudson.plugins.ec2.EC2Computer._describeInstanceOnce(EC2Computer.java:173)
at hudson.plugins.ec2.EC2Computer._describeInstance(EC2Computer.java:157)
at hudson.plugins.ec2.EC2Computer.describeInstance(EC2Computer.java:115)
at hudson.plugins.ec2.EC2Computer.getUptime(EC2Computer.java:141)
at hudson.plugins.ec2.EC2RetentionStrategy.internalCheck(EC2RetentionStrategy.java:104)
at hudson.plugins.ec2.EC2RetentionStrategy.check(EC2RetentionStrategy.java:85)
at hudson.plugins.ec2.EC2RetentionStrategy.check(EC2RetentionStrategy.java:43)
at hudson.slaves.ComputerRetentionWork$1.run(ComputerRetentionWork.java:72)
at hudson.model.Queue._withLock(Queue.java:1380)
at hudson.model.Queue.withLock(Queue.java:1257)
at hudson.slaves.ComputerRetentionWork.doRun(ComputerRetentionWork.java:63)
at hudson.triggers.SafeTimerTask.run(SafeTimerTask.java:72)
at jenkins.security.ImpersonatingScheduledExecutorService$1.run(ImpersonatingScheduledExecutorService.java:58)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
, jenkins.util.Timer [#7] locked on java.util.concurrent.locks.ReentrantLock$NonfairSync@4f27ad4d (owned by jenkins.util.Timer [#9]):
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
at java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)
at hudson.model.Queue._withLock(Queue.java:1437)
at hudson.model.Queue.withLock(Queue.java:1300)
at jenkins.model.Nodes.updateNode(Nodes.java:193)
at jenkins.model.Jenkins.updateNode(Jenkins.java:2080)
at hudson.model.Node.save(Node.java:140)
at hudson.util.PersistedList.onModified(PersistedList.java:173)
at hudson.util.PersistedList.replaceBy(PersistedList.java:85)
at hudson.model.Slave.<init>(Slave.java:198)
at hudson.plugins.ec2.EC2AbstractSlave.<init>(EC2AbstractSlave.java:134)
at hudson.plugins.ec2.EC2SpotSlave.<init>(EC2SpotSlave.java:43)
at hudson.plugins.ec2.EC2SpotSlave.<init>(EC2SpotSlave.java:36)
at hudson.plugins.ec2.SlaveTemplate.newSpotSlave(SlaveTemplate.java:914)
at hudson.plugins.ec2.SlaveTemplate.provisionSpot(SlaveTemplate.java:893)
at hudson.plugins.ec2.SlaveTemplate.provision(SlaveTemplate.java:404)
at hudson.plugins.ec2.EC2Cloud.getNewOrExistingAvailableSlave(EC2Cloud.java:534)
at hudson.plugins.ec2.EC2Cloud.provision(EC2Cloud.java:551)
at hudson.slaves.NodeProvisioner$StandardStrategyImpl.apply(NodeProvisioner.java:715)
at hudson.slaves.NodeProvisioner.update(NodeProvisioner.java:320)
at hudson.slaves.NodeProvisioner.access$000(NodeProvisioner.java:61)
at hudson.slaves.NodeProvisioner$NodeProvisionerInvoker.doRun(NodeProvisioner.java:807)
at hudson.triggers.SafeTimerTask.run(SafeTimerTask.java:72)
at jenkins.security.ImpersonatingScheduledExecutorService$1.run(ImpersonatingScheduledExecutorService.java:58)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
], timestamp=2018-10-29T05:32:10.085Z}
Â
(sorry the formatting looks bad, so I uploaded it here as well:Â https://pastebin.co.za/5677303661068288)Â
- relates to
-
JENKINS-55189 Thread deadlock when spinning up hosts with ec2-plugin
-
- Resolved
-
-
JENKINS-54187 EC2 Plugin deadlock leaving Jenkins unresponsive
-
- Closed
-