CORDA-4034 ENT-5679 CORDA-3845 Correct instability of linear builds (#6722)

* CORDA-4034 Reduce forkEvery to 15 to attempt to mitigate memory leak.
* ENT-5679 Disable test which triggers OOM
* Run tests on two Jenkins agents
* Fixed processing JUnit test results by Allure
* Add timeouts to VaultObserverExceptionTest
* Revert "CORDA-3845: Update BC, log4j, slf4j (#6699)" to eliminate introduced memory leaks

Co-authored-by: Waldemar Zurowski <waldemar.zurowski@r3.com>
This commit is contained in:
Ross Nicoll 2020-09-18 00:23:16 +01:00 committed by GitHub
parent cc14c8e53a
commit 2064a572ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 249 additions and 82 deletions

View File

@ -52,10 +52,15 @@ String COMMON_GRADLE_PARAMS = [
pipeline {
agent { label 'standard' }
/*
* List options in alphabetical order
*/
options {
timestamps()
timeout(time: 6, unit: 'HOURS')
buildDiscarder(logRotator(daysToKeepStr: '14', artifactDaysToKeepStr: '14'))
parallelsAlwaysFailFast()
timeout(time: 6, unit: 'HOURS')
timestamps()
}
parameters {
@ -85,6 +90,11 @@ pipeline {
}
}
stage('Stash') {
steps {
stash name: 'compiled', useDefaultExcludes: false
}
}
stage('Sonatype Check') {
steps {
script {
@ -104,53 +114,110 @@ pipeline {
}
}
stage('Unit Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'test'
].join(' ')
}
}
stage('All Tests') {
parallel {
stage('Another agent') {
agent {
label 'standard'
}
options {
skipDefaultCheckout true
}
post {
always {
archiveArtifacts artifacts: '**/*.log', fingerprint: false
archiveArtifacts artifacts: '**/build/reports/tests/**', fingerprint: false
junit testResults: '**/build/test-results/**/*.xml', keepLongStdio: true
/*
* Copy all JUnit results files into a single top level directory.
* This is necessary to stop the allure plugin from hitting out
* of memory errors due to being passed many directories with
* long paths.
*
* File names are pre-pended with a prefix when
* copied to avoid collisions between files where the same test
* classes have run on multiple agents.
*/
fileOperations([fileCopyOperation(
includes: '**/build/test-results/**/*.xml',
targetLocation: 'allure-input',
flattenFiles: true,
renameFiles: true,
sourceCaptureExpression: '.*/([^/]+)$',
targetNameExpression: 'other-agent-$1')])
stash name: 'allure-input', includes: 'allure-input/**', useDefaultExcludes: false
}
cleanup {
deleteDir() /* clean up our workspace */
}
}
stages {
stage('Unstash') {
steps {
unstash 'compiled'
}
}
stage('Recompile') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'jar'
].join(' ')
}
}
stage('Unit Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'test'
].join(' ')
}
}
stage('Smoke Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'smokeTest'
].join(' ')
}
}
stage('Slow Integration Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'slowIntegrationTest'
].join(' ')
}
}
}
}
stage('Same agent') {
stages {
stage('Integration Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'integrationTest'
].join(' ')
}
}
stage('Integration Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'integrationTest'
].join(' ')
}
}
stage('Smoke Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'smokeTest'
].join(' ')
}
}
stage('Slow Integration Test') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'slowIntegrationTest'
].join(' ')
}
}
stage('Deploy Node') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'deployNode'
].join(' ')
stage('Deploy Node') {
steps {
sh script: [
'./gradlew',
COMMON_GRADLE_PARAMS,
'deployNode'
].join(' ')
}
}
}
}
}
}
@ -209,6 +276,7 @@ pipeline {
archiveArtifacts artifacts: '**/*.log', fingerprint: false
archiveArtifacts artifacts: '**/build/reports/tests/**', fingerprint: false
junit testResults: '**/build/test-results/**/*.xml', keepLongStdio: true
unstash 'allure-input'
script {
try {
@ -218,17 +286,17 @@ pipeline {
* of memory errors due to being passed many directories with
* long paths.
*
* File names are pre-pended with the pod number when
* File names are pre-pended with a prefix when
* copied to avoid collisions between files where the same test
* classes have run on multiple pods.
* classes have run on multiple agents.
*/
fileOperations([fileCopyOperation(
includes: '**/test-results-xml/**/test-runs/test-reports/**',
includes: '**/build/test-results/**/*.xml',
targetLocation: 'allure-input',
flattenFiles: true,
renameFiles: true,
sourceCaptureExpression: '.*test-results-xml/.*-([\\d]+)/.*/([^/]+)$',
targetNameExpression: '$1-$2')])
sourceCaptureExpression: '.*/([^/]+)$',
targetNameExpression: 'same-agent-$1')])
allure includeProperties: false,
jdk: '',
results: [[path: '**/allure-input']]

View File

@ -67,8 +67,8 @@ buildscript {
ext.jersey_version = '2.25'
ext.servlet_version = '4.0.1'
ext.assertj_version = '3.12.2'
ext.slf4j_version = '1.7.30'
ext.log4j_version = '2.12.1'
ext.slf4j_version = '1.7.26'
ext.log4j_version = '2.11.2'
ext.bouncycastle_version = constants.getProperty("bouncycastleVersion")
ext.guava_version = constants.getProperty("guavaVersion")
ext.caffeine_version = constants.getProperty("caffeineVersion")
@ -309,7 +309,7 @@ allprojects {
}
tasks.withType(Test) {
forkEvery = 20
forkEvery = 15
ignoreFailures = project.hasProperty('tests.ignoreFailures') ? project.property('tests.ignoreFailures').toBoolean() : false
failFast = project.hasProperty('tests.failFast') ? project.property('tests.failFast').toBoolean() : false

View File

@ -0,0 +1,28 @@
package net.corda.common.logging
import org.apache.logging.log4j.core.Core
import org.apache.logging.log4j.core.LogEvent
import org.apache.logging.log4j.core.appender.rewrite.RewritePolicy
import org.apache.logging.log4j.core.config.plugins.Plugin
import org.apache.logging.log4j.core.config.plugins.PluginFactory
import org.apache.logging.log4j.core.impl.Log4jLogEvent
@Plugin(name = "ErrorCodeRewritePolicy", category = Core.CATEGORY_NAME, elementType = "rewritePolicy", printObject = false)
class ErrorCodeRewritePolicy : RewritePolicy {
override fun rewrite(source: LogEvent): LogEvent? {
val newMessage = source.message?.withErrorCodeFor(source.thrown, source.level)
return if (newMessage == source.message) {
source
} else {
Log4jLogEvent.Builder(source).setMessage(newMessage).build()
}
}
companion object {
@JvmStatic
@PluginFactory
fun createPolicy(): ErrorCodeRewritePolicy {
return ErrorCodeRewritePolicy()
}
}
}

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="info" shutdownHook="disable">
<Configuration status="info" packages="net.corda.common.logging" shutdownHook="disable">
<Properties>
<Property name="log-path">${sys:log-path:-logs}</Property>
@ -172,17 +172,21 @@
<Rewrite name="Console-ErrorCode-Selector">
<AppenderRef ref="Console-Selector"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
<Rewrite name="Console-ErrorCode-Appender-Println">
<AppenderRef ref="Console-Appender-Println"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
<Rewrite name="RollingFile-ErrorCode-Appender">
<AppenderRef ref="RollingFile-Appender"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
<Rewrite name="Diagnostic-RollingFile-ErrorCode-Appender">
<AppenderRef ref="Diagnostic-RollingFile-Appender"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
</Appenders>

View File

@ -22,8 +22,8 @@ quasarVersion11=0.8.0
# Specify a classifier for Java 11 built artifacts
jdkClassifier11=jdk11
proguardVersion=6.1.1
bouncycastleVersion=1.66
classgraphVersion=4.8.78
bouncycastleVersion=1.61
classgraphVersion=4.8.41
disruptorVersion=3.4.2
typesafeConfigVersion=1.3.4
jsr305Version=3.0.2

View File

@ -0,0 +1,62 @@
package net.corda.node.logging
import net.corda.core.flows.FlowException
import net.corda.core.flows.FlowLogic
import net.corda.core.flows.InitiatingFlow
import net.corda.core.flows.StartableByRPC
import net.corda.core.internal.div
import net.corda.core.messaging.FlowHandle
import net.corda.core.messaging.startFlow
import net.corda.core.utilities.getOrThrow
import net.corda.testing.driver.DriverParameters
import net.corda.testing.driver.NodeHandle
import net.corda.testing.driver.driver
import org.assertj.core.api.Assertions.assertThat
import org.junit.Test
import java.io.File
class ErrorCodeLoggingTests {
@Test
fun `log entries with a throwable and ERROR or WARN get an error code appended`() {
driver(DriverParameters(notarySpecs = emptyList())) {
val node = startNode(startInSameProcess = false).getOrThrow()
node.rpc.startFlow(::MyFlow).waitForCompletion()
val logFile = node.logFile()
val linesWithErrorCode = logFile.useLines { lines -> lines.filter { line -> line.contains("[errorCode=") }.filter { line -> line.contains("moreInformationAt=https://errors.corda.net/") }.toList() }
assertThat(linesWithErrorCode).isNotEmpty
}
}
// This is used to detect broken logging which can be caused by loggers being initialized
// before the initLogging() call is made
@Test
fun `When logging is set to error level, there are no other levels logged after node startup`() {
driver(DriverParameters(notarySpecs = emptyList())) {
val node = startNode(startInSameProcess = false, logLevelOverride = "ERROR").getOrThrow()
node.rpc.startFlow(::MyFlow).waitForCompletion()
val logFile = node.logFile()
// An exception thrown in a flow will log at the "INFO" level.
assertThat(logFile.length()).isEqualTo(0)
}
}
@StartableByRPC
@InitiatingFlow
class MyFlow : FlowLogic<String>() {
override fun call(): String {
throw IllegalArgumentException("Mwahahahah")
}
}
}
private fun FlowHandle<*>.waitForCompletion() {
try {
returnValue.getOrThrow()
} catch (e: Exception) {
// This is expected to throw an exception, using getOrThrow() just to wait until done.
}
}
fun NodeHandle.logFile(): File = (baseDirectory / "logs").toFile().walk().filter { it.name.startsWith("node-") && it.extension == "log" }.single()

View File

@ -22,6 +22,7 @@ import net.corda.testing.driver.driver
import net.corda.testing.node.User
import net.corda.testing.node.internal.DUMMY_CONTRACTS_CORDAPP
import net.corda.testing.node.internal.enclosedCordapp
import org.junit.Ignore
import org.junit.Test
import kotlin.test.assertEquals
@ -29,6 +30,7 @@ import kotlin.test.assertEquals
* Check that we can add lots of large attachments to a transaction and that it works OK, e.g. does not hit the
* transaction size limit (which should only consider the hashes).
*/
@Ignore("ENT-5679: This test triggers OOM errors")
class LargeTransactionsTest {
private companion object {
val BOB = TestIdentity(BOB_NAME, 80).party

View File

@ -29,7 +29,7 @@ import kotlin.test.assertFailsWith
class VaultObserverExceptionTest {
companion object {
val waitForFlowDuration = 45.seconds
val log = contextLogger()
private fun testCordapps() = listOf(
@ -49,7 +49,7 @@ class VaultObserverExceptionTest {
* Causing an SqlException via a syntax error in a vault observer causes the flow to hit the
* DatabsaseEndocrinologist in the FlowHospital and being kept for overnight observation
*/
@Test
@Test(timeout=300_000)
fun unhandledSqlExceptionFromVaultObserverGetsHospitatlised() {
val testControlFuture = openFuture<Boolean>().toCompletableFuture()
@ -73,7 +73,7 @@ class VaultObserverExceptionTest {
"Syntax Error in Custom SQL",
CreateStateFlow.errorTargetsToNum(CreateStateFlow.ErrorTarget.ServiceSqlSyntaxError)
).returnValue.then { testControlFuture.complete(false) }
val foundExpectedException = testControlFuture.getOrThrow(30.seconds)
val foundExpectedException = testControlFuture.getOrThrow(waitForFlowDuration)
Assert.assertTrue(foundExpectedException)
}
@ -83,7 +83,7 @@ class VaultObserverExceptionTest {
* Throwing a random (non-SQL releated) exception from a vault observer causes the flow to be
* aborted when unhandled in user code
*/
@Test
@Test(timeout=300_000)
fun otherExceptionsFromVaultObserverBringFlowDown() {
driver(DriverParameters(
startNodesInProcess = true,
@ -95,7 +95,7 @@ class VaultObserverExceptionTest {
::Initiator,
"InvalidParameterException",
CreateStateFlow.errorTargetsToNum(CreateStateFlow.ErrorTarget.ServiceThrowInvalidParameter)
).returnValue.getOrThrow(30.seconds)
).returnValue.getOrThrow(waitForFlowDuration)
}
}
}
@ -104,7 +104,7 @@ class VaultObserverExceptionTest {
* A random exception from a VaultObserver will bring the Rx Observer down, but can be handled in the flow
* triggering the observer, and the flow will continue successfully (for some values of success)
*/
@Test
@Test(timeout=300_000)
fun otherExceptionsFromVaultObserverCanBeSuppressedInFlow() {
driver(DriverParameters(
startNodesInProcess = true,
@ -114,7 +114,7 @@ class VaultObserverExceptionTest {
aliceNode.rpc.startFlow(::Initiator, "InvalidParameterException", CreateStateFlow.errorTargetsToNum(
CreateStateFlow.ErrorTarget.ServiceThrowInvalidParameter,
CreateStateFlow.ErrorTarget.FlowSwallowErrors))
.returnValue.getOrThrow(30.seconds)
.returnValue.getOrThrow(waitForFlowDuration)
}
}
@ -123,7 +123,7 @@ class VaultObserverExceptionTest {
* If the state we are trying to persist triggers a persistence exception, the flow hospital will retry the flow
* and keep it in for observation if errors persist.
*/
@Test
@Test(timeout=300_000)
fun persistenceExceptionOnCommitGetsRetriedAndThenGetsKeptForObservation() {
var admitted = 0
var observation = 0
@ -152,7 +152,7 @@ class VaultObserverExceptionTest {
* If we have a state causing a database error lined up for persistence, calling jdbConnection() in
* the vault observer will trigger a flush that throws. This will be kept in for observation.
*/
@Test
@Test(timeout=300_000)
fun persistenceExceptionOnFlushGetsRetriedAndThenGetsKeptForObservation() {
var counter = 0
StaffedFlowHospital.DatabaseEndocrinologist.customConditions.add {
@ -179,7 +179,7 @@ class VaultObserverExceptionTest {
aliceNode.rpc.startFlow(::Initiator, "EntityManager", errorTargetsToNum(
CreateStateFlow.ErrorTarget.ServiceValidUpdate,
CreateStateFlow.ErrorTarget.TxInvalidState))
.returnValue.getOrThrow(30.seconds)
.returnValue.getOrThrow(waitForFlowDuration)
}
}
Assert.assertTrue("Flow has not been to hospital", counter > 0)
@ -193,7 +193,7 @@ class VaultObserverExceptionTest {
* does not change the outcome - the first exception in the service will bring the service down and will
* be caught by the flow, but the state machine will error the flow anyway as Corda code threw.
*/
@Test
@Test(timeout=300_000)
fun persistenceExceptionOnFlushInVaultObserverCannotBeSuppressedInFlow() {
var counter = 0
StaffedFlowHospital.DatabaseEndocrinologist.customConditions.add {
@ -220,7 +220,7 @@ class VaultObserverExceptionTest {
CreateStateFlow.ErrorTarget.TxInvalidState,
CreateStateFlow.ErrorTarget.FlowSwallowErrors))
val flowResult = flowHandle.returnValue
assertFailsWith<TimeoutException>("PersistenceException") { flowResult.getOrThrow(30.seconds) }
assertFailsWith<TimeoutException>("PersistenceException") { flowResult.getOrThrow(waitForFlowDuration) }
Assert.assertTrue("Flow has not been to hospital", counter > 0)
}
}
@ -231,7 +231,7 @@ class VaultObserverExceptionTest {
* Trying to catch and suppress that exception inside the service does protect the service, but the new
* interceptor will fail the flow anyway. The flow will be kept in for observation if errors persist.
*/
@Test
@Test(timeout=300_000)
fun persistenceExceptionOnFlushInVaultObserverCannotBeSuppressedInService() {
var counter = 0
StaffedFlowHospital.DatabaseEndocrinologist.customConditions.add {
@ -257,7 +257,7 @@ class VaultObserverExceptionTest {
CreateStateFlow.ErrorTarget.TxInvalidState,
CreateStateFlow.ErrorTarget.ServiceSwallowErrors))
val flowResult = flowHandle.returnValue
assertFailsWith<TimeoutException>("PersistenceException") { flowResult.getOrThrow(30.seconds) }
assertFailsWith<TimeoutException>("PersistenceException") { flowResult.getOrThrow(waitForFlowDuration) }
Assert.assertTrue("Flow has not been to hospital", counter > 0)
}
}
@ -267,7 +267,7 @@ class VaultObserverExceptionTest {
* therefore handling it in flow code is no good, and the error will be passed to the flow hospital via the
* interceptor.
*/
@Test
@Test(timeout=300_000)
fun syntaxErrorInUserCodeInServiceCannotBeSuppressedInFlow() {
val testControlFuture = openFuture<Boolean>()
StaffedFlowHospital.onFlowKeptForOvernightObservation.add { _, _ ->
@ -288,7 +288,7 @@ class VaultObserverExceptionTest {
log.info("Flow has finished")
testControlFuture.set(false)
}
Assert.assertTrue("Flow has not been kept in hospital", testControlFuture.getOrThrow(30.seconds))
Assert.assertTrue("Flow has not been kept in hospital", testControlFuture.getOrThrow(waitForFlowDuration))
}
}
@ -296,7 +296,7 @@ class VaultObserverExceptionTest {
* User code throwing a syntax error and catching suppressing that within the observer code is fine
* and should not have any impact on the rest of the flow
*/
@Test
@Test(timeout=300_000)
fun syntaxErrorInUserCodeInServiceCanBeSuppressedInService() {
driver(DriverParameters(
startNodesInProcess = true,
@ -307,7 +307,7 @@ class VaultObserverExceptionTest {
CreateStateFlow.ErrorTarget.ServiceSqlSyntaxError,
CreateStateFlow.ErrorTarget.ServiceSwallowErrors))
val flowResult = flowHandle.returnValue
flowResult.getOrThrow(30.seconds)
flowResult.getOrThrow(waitForFlowDuration)
}
}
}

View File

@ -26,7 +26,6 @@ import net.corda.testing.node.internal.genericDriver
import net.corda.testing.node.internal.getTimestampAsDirectoryName
import net.corda.testing.node.internal.newContext
import rx.Observable
import java.io.File
import java.nio.file.Path
import java.nio.file.Paths
import java.util.concurrent.atomic.AtomicInteger
@ -67,8 +66,6 @@ interface NodeHandle : AutoCloseable {
fun stop()
}
fun NodeHandle.logFile(): File = (baseDirectory / "logs").toFile().walk().filter { it.name.startsWith("node-") && it.extension == "log" }.single()
/** Interface which represents an out of process node and exposes its process handle. **/
@DoNotImplement
interface OutOfProcess : NodeHandle {

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="info">
<Configuration status="info" packages="net.corda.common.logging">
<Properties>
<Property name="log-path">${sys:log-path:-logs}</Property>
@ -63,14 +63,17 @@
<Rewrite name="Console-ErrorCode-Appender">
<AppenderRef ref="Console-Appender"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
<Rewrite name="Console-ErrorCode-Appender-Println">
<AppenderRef ref="Console-Appender-Println"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
<Rewrite name="RollingFile-ErrorCode-Appender">
<AppenderRef ref="RollingFile-Appender"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
</Appenders>

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="info">
<Configuration status="info" packages="net.corda.common.logging">
<Properties>
<Property name="log-path">${sys:log-path:-logs}</Property>
@ -65,14 +65,17 @@
<Rewrite name="Console-ErrorCode-Appender">
<AppenderRef ref="Console-Appender"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
<Rewrite name="Console-ErrorCode-Appender-Println">
<AppenderRef ref="Console-Appender-Println"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
<Rewrite name="RollingFile-ErrorCode-Appender">
<AppenderRef ref="RollingFile-Appender"/>
<ErrorCodeRewritePolicy/>
</Rewrite>
</Appenders>