Enable test retry in distributed testing (#5702)

* TM-88 only write the test to the file if it passes otherwise retry

* TM-88 1 failing test for testing purposes

* TM-88 fail an integration test instead of unit test

* TM-88 include failed test for retry

* TM-88 new logic for retrying failed tests

* TM-88 refactored to use retry class when dealing with test failures + copy results even after pod failure

* TM-88 remaining pods as set to prevent duplicates, limit retries to prevent crashed build and a bit more logging

* TM-88 atomic int instead of int array

* TM-88 moving atomic int outside retry loops

* TM-88 removing second retry as it is not needed

* TM-88 small fix to final copy

* TM-88 while loop to prevent destruction of results on test retry

* TM-88 removing shutdown hook

* TM-88 reverting to previous implementation

* TM-88 copying xml files before retry

* TM-88 removing fail
This commit is contained in:
Razvan Codreanu 2019-11-15 15:07:23 +00:00 committed by Stefano Franz
parent b48a714aaa
commit deed4e9763
3 changed files with 32 additions and 12 deletions

View File

@ -7,6 +7,8 @@ import org.gradle.api.Plugin
import org.gradle.api.Project import org.gradle.api.Project
import org.gradle.api.Task import org.gradle.api.Task
import org.gradle.api.tasks.testing.Test import org.gradle.api.tasks.testing.Test
import org.gradle.api.tasks.testing.TestResult
import org.gradle.internal.impldep.junit.framework.TestFailure
import java.util.stream.Collectors import java.util.stream.Collectors
@ -252,8 +254,10 @@ class DistributedTesting implements Plugin<Project> {
} }
afterTest { desc, result -> afterTest { desc, result ->
executedTestsFile.withWriterAppend { writer -> if (result.getResultType() == TestResult.ResultType.SUCCESS ) {
writer.writeLine(desc.getClassName() + "." + desc.getName()) executedTestsFile.withWriterAppend { writer ->
writer.writeLine(desc.getClassName() + "." + desc.getName())
}
} }
} }
} }

View File

@ -18,7 +18,6 @@ import io.fabric8.kubernetes.client.dsl.PodResource;
import io.fabric8.kubernetes.client.utils.Serialization; import io.fabric8.kubernetes.client.utils.Serialization;
import net.corda.testing.retry.Retry; import net.corda.testing.retry.Retry;
import okhttp3.Response; import okhttp3.Response;
import org.apache.commons.compress.utils.IOUtils;
import org.gradle.api.DefaultTask; import org.gradle.api.DefaultTask;
import org.gradle.api.tasks.TaskAction; import org.gradle.api.tasks.TaskAction;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
@ -27,8 +26,6 @@ import java.io.BufferedReader;
import java.io.BufferedWriter; import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -36,23 +33,25 @@ import java.io.InputStreamReader;
import java.io.PipedInputStream; import java.io.PipedInputStream;
import java.io.PipedOutputStream; import java.io.PipedOutputStream;
import java.math.BigInteger; import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import java.util.Queue; import java.util.Queue;
import java.util.Random; import java.util.Random;
import java.util.Set;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
@ -76,7 +75,7 @@ public class KubesTest extends DefaultTask {
Integer memoryGbPerFork = 6; Integer memoryGbPerFork = 6;
public volatile List<File> testOutput = Collections.emptyList(); public volatile List<File> testOutput = Collections.emptyList();
public volatile List<KubePodResult> containerResults = Collections.emptyList(); public volatile List<KubePodResult> containerResults = Collections.emptyList();
private final List<String> remainingPods = Collections.synchronizedList(new ArrayList()); private final Set<String> remainingPods = Collections.synchronizedSet(new HashSet());
public static String NAMESPACE = "thisisatest"; public static String NAMESPACE = "thisisatest";
int k8sTimeout = 50 * 1_000; int k8sTimeout = 50 * 1_000;
@ -214,7 +213,7 @@ public class KubesTest extends DefaultTask {
}); });
int podNumber = podIdx + 1; int podNumber = podIdx + 1;
final AtomicInteger testRetries = new AtomicInteger(0);
try { try {
// pods might die, so we retry // pods might die, so we retry
return Retry.fixed(numberOfRetries).run(() -> { return Retry.fixed(numberOfRetries).run(() -> {
@ -248,11 +247,20 @@ public class KubesTest extends DefaultTask {
if (!podLogsDirectory.exists()) { if (!podLogsDirectory.exists()) {
podLogsDirectory.mkdirs(); podLogsDirectory.mkdirs();
} }
File podOutput = executeBuild(namespace, numberOfPods, podIdx, podName, podLogsDirectory, printOutput, stdOutOs, stdOutIs, errChannelStream, waiter);
File podOutput = executeBuild(namespace, numberOfPods, podIdx, podName, podLogsDirectory, printOutput, stdOutOs, stdOutIs, errChannelStream, waiter);
int resCode = waiter.join(); int resCode = waiter.join();
getProject().getLogger().lifecycle("build has ended on on pod " + podName + " (" + podNumber + "/" + numberOfPods + ") with result " + resCode + " , gathering results"); getProject().getLogger().lifecycle("build has ended on on pod " + podName + " (" + podNumber + "/" + numberOfPods + ") with result " + resCode + " , gathering results");
Collection<File> binaryResults = downloadTestXmlFromPod(namespace, createdPod); Collection<File> binaryResults;
//we don't retry on the final attempt as this will crash the build and some pods might not get to finish
if (resCode != 0 && testRetries.getAndIncrement() < numberOfRetries - 1) {
downloadTestXmlFromPod(namespace, createdPod);
getProject().getLogger().lifecycle("There are test failures in this pod. Retrying failed tests!!!");
throw new RuntimeException("There are test failures in this pod");
} else {
binaryResults = downloadTestXmlFromPod(namespace, createdPod);
}
getLogger().lifecycle("removing pod " + podName + " (" + podNumber + "/" + numberOfPods + ") after completed build"); getLogger().lifecycle("removing pod " + podName + " (" + podNumber + "/" + numberOfPods + ") after completed build");
try (KubernetesClient client = getKubernetesClient()) { try (KubernetesClient client = getKubernetesClient()) {
@ -267,6 +275,8 @@ public class KubesTest extends DefaultTask {
return new KubePodResult(podIdx, resCode, podOutput, binaryResults); return new KubePodResult(podIdx, resCode, podOutput, binaryResults);
}); });
} catch (Retry.RetryException e) { } catch (Retry.RetryException e) {
Pod pod = getKubernetesClient().pods().inNamespace(namespace).create(buildPodRequest(podName, pvc));
downloadTestXmlFromPod(namespace, pod);
throw new RuntimeException("Failed to build in pod " + podName + " (" + podNumber + "/" + numberOfPods + ") in " + numberOfRetries + " attempts", e); throw new RuntimeException("Failed to build in pod " + podName + " (" + podNumber + "/" + numberOfPods + ") in " + numberOfRetries + " attempts", e);
} }
} }

View File

@ -2,10 +2,14 @@ package net.corda.testing.driver
import net.corda.core.concurrent.CordaFuture import net.corda.core.concurrent.CordaFuture
import net.corda.core.identity.CordaX500Name import net.corda.core.identity.CordaX500Name
import net.corda.core.internal.* import net.corda.core.internal.CertRole
import net.corda.core.internal.concurrent.fork import net.corda.core.internal.concurrent.fork
import net.corda.core.internal.concurrent.openFuture import net.corda.core.internal.concurrent.openFuture
import net.corda.core.internal.concurrent.transpose import net.corda.core.internal.concurrent.transpose
import net.corda.core.internal.div
import net.corda.core.internal.isRegularFile
import net.corda.core.internal.list
import net.corda.core.internal.readLines
import net.corda.core.utilities.getOrThrow import net.corda.core.utilities.getOrThrow
import net.corda.node.internal.NodeStartup import net.corda.node.internal.NodeStartup
import net.corda.testing.common.internal.ProjectStructure.projectRootDir import net.corda.testing.common.internal.ProjectStructure.projectRootDir
@ -15,7 +19,9 @@ import net.corda.testing.core.DUMMY_BANK_B_NAME
import net.corda.testing.http.HttpApi import net.corda.testing.http.HttpApi
import net.corda.testing.node.internal.addressMustBeBound import net.corda.testing.node.internal.addressMustBeBound
import net.corda.testing.node.internal.addressMustNotBeBound import net.corda.testing.node.internal.addressMustNotBeBound
import org.assertj.core.api.Assertions.* import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.api.Assertions.assertThatCode
import org.assertj.core.api.Assertions.assertThatIllegalArgumentException
import org.json.simple.JSONObject import org.json.simple.JSONObject
import org.junit.Test import org.junit.Test
import java.util.* import java.util.*