Skip to content

Commit f6c9e89

Browse files
authored
Merge pull request #408 from Vlatombe/resuming-node-fails-if-not-assigned
Build fails to resume if controller crashes before queue is saved
2 parents 6ae8123 + 680ce20 commit f6c9e89

File tree

3 files changed

+180
-7
lines changed

3 files changed

+180
-7
lines changed

pom.xml

+11-5
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
<parent>
2929
<groupId>org.jenkins-ci.plugins</groupId>
3030
<artifactId>plugin</artifactId>
31-
<version>4.88</version>
31+
<version>5.3</version>
3232
<relativePath/>
3333
</parent>
3434
<groupId>org.jenkins-ci.plugins.workflow</groupId>
@@ -67,8 +67,9 @@
6767
<properties>
6868
<changelist>999999-SNAPSHOT</changelist>
6969
<!-- TODO Until in plugin-pom -->
70-
<jenkins-test-harness.version>2182.v0138ccb_c0b_cb_</jenkins-test-harness.version>
71-
<jenkins.version>2.440.1</jenkins.version>
70+
<jenkins-test-harness.version>2357.vf2a_982b_b_910f</jenkins-test-harness.version>
71+
<jenkins.baseline>2.479</jenkins.baseline>
72+
<jenkins.version>${jenkins.baseline}.1</jenkins.version>
7273
<useBeta>true</useBeta>
7374
<gitHubRepo>jenkinsci/${project.artifactId}-plugin</gitHubRepo>
7475
<hpi.compatibleSinceVersion>2.40</hpi.compatibleSinceVersion>
@@ -77,8 +78,8 @@
7778
<dependencies>
7879
<dependency>
7980
<groupId>io.jenkins.tools.bom</groupId>
80-
<artifactId>bom-2.440.x</artifactId>
81-
<version>2907.vcb_35d6f2f7de</version>
81+
<artifactId>bom-${jenkins.baseline}.x</artifactId>
82+
<version>3482.vc10d4f6da_28a_</version>
8283
<scope>import</scope>
8384
<type>pom</type>
8485
</dependency>
@@ -116,6 +117,11 @@
116117
<artifactId>workflow-basic-steps</artifactId>
117118
<scope>test</scope>
118119
</dependency>
120+
<dependency>
121+
<groupId>org.jenkins-ci.plugins</groupId>
122+
<artifactId>pipeline-input-step</artifactId>
123+
<scope>test</scope>
124+
</dependency>
119125
<dependency>
120126
<groupId>org.jenkins-ci.plugins</groupId>
121127
<artifactId>pipeline-stage-step</artifactId>

src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java

+20-2
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,26 @@ public void stop(@NonNull Throwable cause) throws Exception {
215215
@Override public void onResume() {
216216
try {
217217
if (state == null) {
218-
Run<?, ?> run = getContext().get(Run.class);
219-
LOGGER.fine(() -> "No ExecutorStepDynamicContext found for node block in " + run + "; perhaps loading from a historical build record, hoping for the best");
218+
var flowNode = getContext().get(FlowNode.class);
219+
LOGGER.fine(() -> "node block " + getContext() + " not yet scheduled, checking for an existing queue item");
220+
if (flowNode == null) {
221+
LOGGER.fine(() -> "No FlowNode found for node block " + getContext() + "; can't recover" );
222+
} else {
223+
var action = flowNode.getAction(QueueItemActionImpl.class);
224+
if (action == null) {
225+
LOGGER.fine(() -> "No QueueItemAction found for node block " + getContext() + "; can't recover");
226+
} else {
227+
LOGGER.fine(() -> "QueueItemAction with id=" + action.id + " found for node block " + getContext());
228+
var queueItem = action.itemInQueue();
229+
if (queueItem == null) {
230+
LOGGER.fine(() -> "Could not find queue item " + action.id + ", rescheduling it");
231+
flowNode.removeActions(QueueItemActionImpl.class);
232+
start();
233+
} else {
234+
LOGGER.fine(() -> "Found Queue.Item " + queueItem + " for node block " + getContext() + "; should be fine");
235+
}
236+
}
237+
}
220238
return;
221239
}
222240
state.resume(getContext());
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
* The MIT License
3+
*
4+
* Copyright (c) 2024, CloudBees, Inc.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in
14+
* all copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
* THE SOFTWARE.
23+
*/
24+
25+
package org.jenkinsci.plugins.workflow.support.steps;
26+
27+
import static org.awaitility.Awaitility.await;
28+
import static org.hamcrest.Matchers.allOf;
29+
import static org.hamcrest.Matchers.arrayWithSize;
30+
import static org.hamcrest.Matchers.hasItem;
31+
import static org.hamcrest.Matchers.hasSize;
32+
import static org.hamcrest.Matchers.is;
33+
import static org.hamcrest.Matchers.iterableWithSize;
34+
import static org.junit.Assert.assertTrue;
35+
36+
import java.io.IOException;
37+
import java.time.Duration;
38+
import java.util.List;
39+
import java.util.concurrent.TimeoutException;
40+
import java.util.logging.Level;
41+
import java.util.logging.Logger;
42+
import org.hamcrest.Description;
43+
import org.hamcrest.TypeSafeMatcher;
44+
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
45+
import org.jenkinsci.plugins.workflow.flow.FlowExecutionList;
46+
import org.jenkinsci.plugins.workflow.graphanalysis.DepthFirstScanner;
47+
import org.jenkinsci.plugins.workflow.graphanalysis.NodeStepTypePredicate;
48+
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
49+
import org.jenkinsci.plugins.workflow.support.steps.input.InputAction;
50+
import org.junit.Rule;
51+
import org.junit.Test;
52+
import org.jvnet.hudson.test.InboundAgentRule;
53+
import org.jvnet.hudson.test.JenkinsRule;
54+
import org.jvnet.hudson.test.PrefixedOutputStream;
55+
import org.jvnet.hudson.test.RealJenkinsRule;
56+
import org.jvnet.hudson.test.TailLog;
57+
58+
public class ExecutorStepExecutionRJRTest {
59+
private static final Logger LOGGER = Logger.getLogger(ExecutorStepExecutionRJRTest.class.getName());
60+
61+
@Rule public RealJenkinsRule rjr = new RealJenkinsRule().withColor(PrefixedOutputStream.Color.GREEN).withPackageLogger(ExecutorStepExecution.class, Level.FINE).withPackageLogger(FlowExecutionList.class, Level.FINE);
62+
63+
@Rule
64+
public InboundAgentRule iar = new InboundAgentRule();
65+
66+
@Test public void restartWhileWaitingForANode() throws Throwable {
67+
rjr.startJenkins();
68+
try (var tailLog = new TailLog(rjr, "p", 1)) {
69+
iar.createAgent(rjr, InboundAgentRule.Options.newBuilder().name("J").label("mib").color(PrefixedOutputStream.Color.YELLOW).webSocket().build());
70+
rjr.runRemotely(ExecutorStepExecutionRJRTest::setupJobAndStart);
71+
rjr.stopJenkinsForcibly();
72+
rjr.startJenkins();
73+
rjr.runRemotely(ExecutorStepExecutionRJRTest::resumeCompleteBranch1ThenBranch2);
74+
}
75+
}
76+
77+
private static void resumeCompleteBranch1ThenBranch2(JenkinsRule r) throws Throwable {
78+
var p = r.jenkins.getItemByFullName("p", WorkflowJob.class);
79+
var b = p.getBuildByNumber(1);
80+
await("Waiting for agent J to reconnect").atMost(Duration.ofSeconds(30)).until(() -> r.jenkins.getComputer("J").isOnline());
81+
var actions = await().until(() -> b.getActions(InputAction.class), allOf(iterableWithSize(1), hasItem(new InputActionWithId("Branch1"))));
82+
proceed(actions, "Branch1", p.getName() + "#" + b.number);
83+
// This is quicker than waitForMessage that can wait for up to 10 minutes
84+
actions = await().until(() -> b.getActions(InputAction.class), allOf(iterableWithSize(1), hasItem(new InputActionWithId("Branch2"))));
85+
r.waitForMessage("Complete branch 2 ?", b);
86+
proceed(actions, "Branch2", p.getName() + "#" + b.number);
87+
r.waitForCompletion(b);
88+
}
89+
90+
private static class InputActionWithId extends TypeSafeMatcher<InputAction> {
91+
private final String inputId;
92+
93+
private InputActionWithId(String inputId) {
94+
this.inputId = inputId;
95+
}
96+
97+
98+
@Override
99+
protected boolean matchesSafely(InputAction inputAction) {
100+
try {
101+
return inputAction.getExecutions().stream().anyMatch(execution -> inputId.equals(execution.getId()));
102+
} catch (InterruptedException | TimeoutException e) {
103+
return false;
104+
}
105+
}
106+
107+
@Override
108+
public void describeTo(Description description) {
109+
description.appendText("has input with id ").appendValue(inputId);
110+
}
111+
}
112+
113+
private static void proceed(List<InputAction> actions, String inputId, String name) throws InterruptedException, TimeoutException, IOException {
114+
for (var action : actions) {
115+
if (action.getExecutions().isEmpty()) {
116+
continue;
117+
}
118+
var inputStepExecution = action.getExecutions().get(0);
119+
if (inputId.equals(inputStepExecution.getId())) {
120+
LOGGER.info(() -> "proceeding " + name);
121+
inputStepExecution.proceed(null);
122+
break;
123+
}
124+
}
125+
}
126+
127+
private static void setupJobAndStart(JenkinsRule r) throws Exception {
128+
var p = r.createProject(WorkflowJob.class, "p");
129+
p.setDefinition(new CpsFlowDefinition("""
130+
parallel 'Branch 1': {
131+
node('mib') {
132+
input id: 'Branch1', message: 'Complete branch 1 ?'
133+
}
134+
}, 'Branch 2': {
135+
sleep 1
136+
node('mib') {
137+
input id:'Branch2', message: 'Complete branch 2 ?'
138+
}
139+
}
140+
""", true));
141+
var b = p.scheduleBuild2(0).waitForStart();
142+
r.waitForMessage("Complete branch 1 ?", b);
143+
assertTrue(b.isBuilding());
144+
await().until(() -> r.jenkins.getQueue().getItems(), arrayWithSize(1));
145+
LOGGER.info("Node steps: " + new DepthFirstScanner().filteredNodes(b.getExecution(), new NodeStepTypePredicate("node")));
146+
// "Branch 1" step start + "Branch 1" body start + "Branch 2" step start
147+
await().until(() -> new DepthFirstScanner().filteredNodes(b.getExecution(), new NodeStepTypePredicate("node")), hasSize(3));
148+
}
149+
}

0 commit comments

Comments
 (0)