From 2064a572ba94c6777a4b9051218305def1b642b4 Mon Sep 17 00:00:00 2001 From: Ross Nicoll Date: Fri, 18 Sep 2020 00:23:16 +0100 Subject: [PATCH] CORDA-4034 ENT-5679 CORDA-3845 Correct instability of linear builds (#6722) * CORDA-4034 Reduce forkEvery to 15 to attempt to mitigate memory leak. * ENT-5679 Disable test which triggers OOM * Run tests on two Jenkins agents * Fixed processing JUnit test results by Allure * Add timeouts to VaultObserverExceptionTest * Revert "CORDA-3845: Update BC, log4j, slf4j (#6699)" to eliminate introduced memory leaks Co-authored-by: Waldemar Zurowski --- .ci/dev/regression/Jenkinsfile | 174 ++++++++++++------ build.gradle | 6 +- .../common/logging/ErrorCodeRewritePolicy.kt | 28 +++ config/dev/log4j2.xml | 6 +- constants.properties | 4 +- .../node/logging/ErrorCodeLoggingTests.kt | 62 +++++++ .../statemachine/LargeTransactionsTest.kt | 2 + .../vault/VaultObserverExceptionTest.kt | 36 ++-- .../kotlin/net/corda/testing/driver/Driver.kt | 3 - .../src/main/resources/log4j2-test.xml | 5 +- .../src/test/resources/log4j2-test.xml | 5 +- 11 files changed, 249 insertions(+), 82 deletions(-) create mode 100644 common/logging/src/main/kotlin/net/corda/common/logging/ErrorCodeRewritePolicy.kt create mode 100644 node/src/integration-test/kotlin/net/corda/node/logging/ErrorCodeLoggingTests.kt diff --git a/.ci/dev/regression/Jenkinsfile b/.ci/dev/regression/Jenkinsfile index b805ff0fd8..7efc7a3b72 100644 --- a/.ci/dev/regression/Jenkinsfile +++ b/.ci/dev/regression/Jenkinsfile @@ -52,10 +52,15 @@ String COMMON_GRADLE_PARAMS = [ pipeline { agent { label 'standard' } + + /* + * List options in alphabetical order + */ options { - timestamps() - timeout(time: 6, unit: 'HOURS') buildDiscarder(logRotator(daysToKeepStr: '14', artifactDaysToKeepStr: '14')) + parallelsAlwaysFailFast() + timeout(time: 6, unit: 'HOURS') + timestamps() } parameters { @@ -85,6 +90,11 @@ pipeline { } } + stage('Stash') { + steps { + stash name: 'compiled', useDefaultExcludes: false + } + } stage('Sonatype Check') { steps { script { @@ -104,53 +114,110 @@ pipeline { } } - stage('Unit Test') { - steps { - sh script: [ - './gradlew', - COMMON_GRADLE_PARAMS, - 'test' - ].join(' ') - } - } + stage('All Tests') { + parallel { + stage('Another agent') { + agent { + label 'standard' + } + options { + skipDefaultCheckout true + } + post { + always { + archiveArtifacts artifacts: '**/*.log', fingerprint: false + archiveArtifacts artifacts: '**/build/reports/tests/**', fingerprint: false + junit testResults: '**/build/test-results/**/*.xml', keepLongStdio: true + /* + * Copy all JUnit results files into a single top level directory. + * This is necessary to stop the allure plugin from hitting out + * of memory errors due to being passed many directories with + * long paths. + * + * File names are pre-pended with a prefix when + * copied to avoid collisions between files where the same test + * classes have run on multiple agents. + */ + fileOperations([fileCopyOperation( + includes: '**/build/test-results/**/*.xml', + targetLocation: 'allure-input', + flattenFiles: true, + renameFiles: true, + sourceCaptureExpression: '.*/([^/]+)$', + targetNameExpression: 'other-agent-$1')]) + stash name: 'allure-input', includes: 'allure-input/**', useDefaultExcludes: false + } + cleanup { + deleteDir() /* clean up our workspace */ + } + } + stages { + stage('Unstash') { + steps { + unstash 'compiled' + } + } + stage('Recompile') { + steps { + sh script: [ + './gradlew', + COMMON_GRADLE_PARAMS, + 'jar' + ].join(' ') + } + } + stage('Unit Test') { + steps { + sh script: [ + './gradlew', + COMMON_GRADLE_PARAMS, + 'test' + ].join(' ') + } + } + stage('Smoke Test') { + steps { + sh script: [ + './gradlew', + COMMON_GRADLE_PARAMS, + 'smokeTest' + ].join(' ') + } + } + stage('Slow Integration Test') { + steps { + sh script: [ + './gradlew', + COMMON_GRADLE_PARAMS, + 'slowIntegrationTest' + ].join(' ') + } + } + } + } + stage('Same agent') { + stages { + stage('Integration Test') { + steps { + sh script: [ + './gradlew', + COMMON_GRADLE_PARAMS, + 'integrationTest' + ].join(' ') + } + } - stage('Integration Test') { - steps { - sh script: [ - './gradlew', - COMMON_GRADLE_PARAMS, - 'integrationTest' - ].join(' ') - } - } - - stage('Smoke Test') { - steps { - sh script: [ - './gradlew', - COMMON_GRADLE_PARAMS, - 'smokeTest' - ].join(' ') - } - } - - stage('Slow Integration Test') { - steps { - sh script: [ - './gradlew', - COMMON_GRADLE_PARAMS, - 'slowIntegrationTest' - ].join(' ') - } - } - - stage('Deploy Node') { - steps { - sh script: [ - './gradlew', - COMMON_GRADLE_PARAMS, - 'deployNode' - ].join(' ') + stage('Deploy Node') { + steps { + sh script: [ + './gradlew', + COMMON_GRADLE_PARAMS, + 'deployNode' + ].join(' ') + } + } + } + } } } @@ -209,6 +276,7 @@ pipeline { archiveArtifacts artifacts: '**/*.log', fingerprint: false archiveArtifacts artifacts: '**/build/reports/tests/**', fingerprint: false junit testResults: '**/build/test-results/**/*.xml', keepLongStdio: true + unstash 'allure-input' script { try { @@ -218,17 +286,17 @@ pipeline { * of memory errors due to being passed many directories with * long paths. * - * File names are pre-pended with the pod number when + * File names are pre-pended with a prefix when * copied to avoid collisions between files where the same test - * classes have run on multiple pods. + * classes have run on multiple agents. */ fileOperations([fileCopyOperation( - includes: '**/test-results-xml/**/test-runs/test-reports/**', + includes: '**/build/test-results/**/*.xml', targetLocation: 'allure-input', flattenFiles: true, renameFiles: true, - sourceCaptureExpression: '.*test-results-xml/.*-([\\d]+)/.*/([^/]+)$', - targetNameExpression: '$1-$2')]) + sourceCaptureExpression: '.*/([^/]+)$', + targetNameExpression: 'same-agent-$1')]) allure includeProperties: false, jdk: '', results: [[path: '**/allure-input']] diff --git a/build.gradle b/build.gradle index 14c1b970ee..bc5933c7de 100644 --- a/build.gradle +++ b/build.gradle @@ -67,8 +67,8 @@ buildscript { ext.jersey_version = '2.25' ext.servlet_version = '4.0.1' ext.assertj_version = '3.12.2' - ext.slf4j_version = '1.7.30' - ext.log4j_version = '2.12.1' + ext.slf4j_version = '1.7.26' + ext.log4j_version = '2.11.2' ext.bouncycastle_version = constants.getProperty("bouncycastleVersion") ext.guava_version = constants.getProperty("guavaVersion") ext.caffeine_version = constants.getProperty("caffeineVersion") @@ -309,7 +309,7 @@ allprojects { } tasks.withType(Test) { - forkEvery = 20 + forkEvery = 15 ignoreFailures = project.hasProperty('tests.ignoreFailures') ? project.property('tests.ignoreFailures').toBoolean() : false failFast = project.hasProperty('tests.failFast') ? project.property('tests.failFast').toBoolean() : false diff --git a/common/logging/src/main/kotlin/net/corda/common/logging/ErrorCodeRewritePolicy.kt b/common/logging/src/main/kotlin/net/corda/common/logging/ErrorCodeRewritePolicy.kt new file mode 100644 index 0000000000..5f0e24bef9 --- /dev/null +++ b/common/logging/src/main/kotlin/net/corda/common/logging/ErrorCodeRewritePolicy.kt @@ -0,0 +1,28 @@ +package net.corda.common.logging + +import org.apache.logging.log4j.core.Core +import org.apache.logging.log4j.core.LogEvent +import org.apache.logging.log4j.core.appender.rewrite.RewritePolicy +import org.apache.logging.log4j.core.config.plugins.Plugin +import org.apache.logging.log4j.core.config.plugins.PluginFactory +import org.apache.logging.log4j.core.impl.Log4jLogEvent + +@Plugin(name = "ErrorCodeRewritePolicy", category = Core.CATEGORY_NAME, elementType = "rewritePolicy", printObject = false) +class ErrorCodeRewritePolicy : RewritePolicy { + override fun rewrite(source: LogEvent): LogEvent? { + val newMessage = source.message?.withErrorCodeFor(source.thrown, source.level) + return if (newMessage == source.message) { + source + } else { + Log4jLogEvent.Builder(source).setMessage(newMessage).build() + } + } + + companion object { + @JvmStatic + @PluginFactory + fun createPolicy(): ErrorCodeRewritePolicy { + return ErrorCodeRewritePolicy() + } + } +} \ No newline at end of file diff --git a/config/dev/log4j2.xml b/config/dev/log4j2.xml index 6593f84ae8..fe67301f84 100644 --- a/config/dev/log4j2.xml +++ b/config/dev/log4j2.xml @@ -1,5 +1,5 @@ - + ${sys:log-path:-logs} @@ -172,17 +172,21 @@ + + + + diff --git a/constants.properties b/constants.properties index 362ac6b643..6cf6942c4d 100644 --- a/constants.properties +++ b/constants.properties @@ -22,8 +22,8 @@ quasarVersion11=0.8.0 # Specify a classifier for Java 11 built artifacts jdkClassifier11=jdk11 proguardVersion=6.1.1 -bouncycastleVersion=1.66 -classgraphVersion=4.8.78 +bouncycastleVersion=1.61 +classgraphVersion=4.8.41 disruptorVersion=3.4.2 typesafeConfigVersion=1.3.4 jsr305Version=3.0.2 diff --git a/node/src/integration-test/kotlin/net/corda/node/logging/ErrorCodeLoggingTests.kt b/node/src/integration-test/kotlin/net/corda/node/logging/ErrorCodeLoggingTests.kt new file mode 100644 index 0000000000..e0532911f0 --- /dev/null +++ b/node/src/integration-test/kotlin/net/corda/node/logging/ErrorCodeLoggingTests.kt @@ -0,0 +1,62 @@ +package net.corda.node.logging + +import net.corda.core.flows.FlowException +import net.corda.core.flows.FlowLogic +import net.corda.core.flows.InitiatingFlow +import net.corda.core.flows.StartableByRPC +import net.corda.core.internal.div +import net.corda.core.messaging.FlowHandle +import net.corda.core.messaging.startFlow +import net.corda.core.utilities.getOrThrow +import net.corda.testing.driver.DriverParameters +import net.corda.testing.driver.NodeHandle +import net.corda.testing.driver.driver +import org.assertj.core.api.Assertions.assertThat +import org.junit.Test +import java.io.File + +class ErrorCodeLoggingTests { + @Test + fun `log entries with a throwable and ERROR or WARN get an error code appended`() { + driver(DriverParameters(notarySpecs = emptyList())) { + val node = startNode(startInSameProcess = false).getOrThrow() + node.rpc.startFlow(::MyFlow).waitForCompletion() + val logFile = node.logFile() + + val linesWithErrorCode = logFile.useLines { lines -> lines.filter { line -> line.contains("[errorCode=") }.filter { line -> line.contains("moreInformationAt=https://errors.corda.net/") }.toList() } + + assertThat(linesWithErrorCode).isNotEmpty + } + } + + // This is used to detect broken logging which can be caused by loggers being initialized + // before the initLogging() call is made + @Test + fun `When logging is set to error level, there are no other levels logged after node startup`() { + driver(DriverParameters(notarySpecs = emptyList())) { + val node = startNode(startInSameProcess = false, logLevelOverride = "ERROR").getOrThrow() + node.rpc.startFlow(::MyFlow).waitForCompletion() + val logFile = node.logFile() + // An exception thrown in a flow will log at the "INFO" level. + assertThat(logFile.length()).isEqualTo(0) + } + } + + @StartableByRPC + @InitiatingFlow + class MyFlow : FlowLogic() { + override fun call(): String { + throw IllegalArgumentException("Mwahahahah") + } + } +} + +private fun FlowHandle<*>.waitForCompletion() { + try { + returnValue.getOrThrow() + } catch (e: Exception) { + // This is expected to throw an exception, using getOrThrow() just to wait until done. + } +} + +fun NodeHandle.logFile(): File = (baseDirectory / "logs").toFile().walk().filter { it.name.startsWith("node-") && it.extension == "log" }.single() \ No newline at end of file diff --git a/node/src/integration-test/kotlin/net/corda/node/services/statemachine/LargeTransactionsTest.kt b/node/src/integration-test/kotlin/net/corda/node/services/statemachine/LargeTransactionsTest.kt index 532b6ca141..3e723c223d 100644 --- a/node/src/integration-test/kotlin/net/corda/node/services/statemachine/LargeTransactionsTest.kt +++ b/node/src/integration-test/kotlin/net/corda/node/services/statemachine/LargeTransactionsTest.kt @@ -22,6 +22,7 @@ import net.corda.testing.driver.driver import net.corda.testing.node.User import net.corda.testing.node.internal.DUMMY_CONTRACTS_CORDAPP import net.corda.testing.node.internal.enclosedCordapp +import org.junit.Ignore import org.junit.Test import kotlin.test.assertEquals @@ -29,6 +30,7 @@ import kotlin.test.assertEquals * Check that we can add lots of large attachments to a transaction and that it works OK, e.g. does not hit the * transaction size limit (which should only consider the hashes). */ +@Ignore("ENT-5679: This test triggers OOM errors") class LargeTransactionsTest { private companion object { val BOB = TestIdentity(BOB_NAME, 80).party diff --git a/node/src/integration-test/kotlin/net/corda/node/services/vault/VaultObserverExceptionTest.kt b/node/src/integration-test/kotlin/net/corda/node/services/vault/VaultObserverExceptionTest.kt index 06c3bc1acf..8b71e344e0 100644 --- a/node/src/integration-test/kotlin/net/corda/node/services/vault/VaultObserverExceptionTest.kt +++ b/node/src/integration-test/kotlin/net/corda/node/services/vault/VaultObserverExceptionTest.kt @@ -29,7 +29,7 @@ import kotlin.test.assertFailsWith class VaultObserverExceptionTest { companion object { - + val waitForFlowDuration = 45.seconds val log = contextLogger() private fun testCordapps() = listOf( @@ -49,7 +49,7 @@ class VaultObserverExceptionTest { * Causing an SqlException via a syntax error in a vault observer causes the flow to hit the * DatabsaseEndocrinologist in the FlowHospital and being kept for overnight observation */ - @Test + @Test(timeout=300_000) fun unhandledSqlExceptionFromVaultObserverGetsHospitatlised() { val testControlFuture = openFuture().toCompletableFuture() @@ -73,7 +73,7 @@ class VaultObserverExceptionTest { "Syntax Error in Custom SQL", CreateStateFlow.errorTargetsToNum(CreateStateFlow.ErrorTarget.ServiceSqlSyntaxError) ).returnValue.then { testControlFuture.complete(false) } - val foundExpectedException = testControlFuture.getOrThrow(30.seconds) + val foundExpectedException = testControlFuture.getOrThrow(waitForFlowDuration) Assert.assertTrue(foundExpectedException) } @@ -83,7 +83,7 @@ class VaultObserverExceptionTest { * Throwing a random (non-SQL releated) exception from a vault observer causes the flow to be * aborted when unhandled in user code */ - @Test + @Test(timeout=300_000) fun otherExceptionsFromVaultObserverBringFlowDown() { driver(DriverParameters( startNodesInProcess = true, @@ -95,7 +95,7 @@ class VaultObserverExceptionTest { ::Initiator, "InvalidParameterException", CreateStateFlow.errorTargetsToNum(CreateStateFlow.ErrorTarget.ServiceThrowInvalidParameter) - ).returnValue.getOrThrow(30.seconds) + ).returnValue.getOrThrow(waitForFlowDuration) } } } @@ -104,7 +104,7 @@ class VaultObserverExceptionTest { * A random exception from a VaultObserver will bring the Rx Observer down, but can be handled in the flow * triggering the observer, and the flow will continue successfully (for some values of success) */ - @Test + @Test(timeout=300_000) fun otherExceptionsFromVaultObserverCanBeSuppressedInFlow() { driver(DriverParameters( startNodesInProcess = true, @@ -114,7 +114,7 @@ class VaultObserverExceptionTest { aliceNode.rpc.startFlow(::Initiator, "InvalidParameterException", CreateStateFlow.errorTargetsToNum( CreateStateFlow.ErrorTarget.ServiceThrowInvalidParameter, CreateStateFlow.ErrorTarget.FlowSwallowErrors)) - .returnValue.getOrThrow(30.seconds) + .returnValue.getOrThrow(waitForFlowDuration) } } @@ -123,7 +123,7 @@ class VaultObserverExceptionTest { * If the state we are trying to persist triggers a persistence exception, the flow hospital will retry the flow * and keep it in for observation if errors persist. */ - @Test + @Test(timeout=300_000) fun persistenceExceptionOnCommitGetsRetriedAndThenGetsKeptForObservation() { var admitted = 0 var observation = 0 @@ -152,7 +152,7 @@ class VaultObserverExceptionTest { * If we have a state causing a database error lined up for persistence, calling jdbConnection() in * the vault observer will trigger a flush that throws. This will be kept in for observation. */ - @Test + @Test(timeout=300_000) fun persistenceExceptionOnFlushGetsRetriedAndThenGetsKeptForObservation() { var counter = 0 StaffedFlowHospital.DatabaseEndocrinologist.customConditions.add { @@ -179,7 +179,7 @@ class VaultObserverExceptionTest { aliceNode.rpc.startFlow(::Initiator, "EntityManager", errorTargetsToNum( CreateStateFlow.ErrorTarget.ServiceValidUpdate, CreateStateFlow.ErrorTarget.TxInvalidState)) - .returnValue.getOrThrow(30.seconds) + .returnValue.getOrThrow(waitForFlowDuration) } } Assert.assertTrue("Flow has not been to hospital", counter > 0) @@ -193,7 +193,7 @@ class VaultObserverExceptionTest { * does not change the outcome - the first exception in the service will bring the service down and will * be caught by the flow, but the state machine will error the flow anyway as Corda code threw. */ - @Test + @Test(timeout=300_000) fun persistenceExceptionOnFlushInVaultObserverCannotBeSuppressedInFlow() { var counter = 0 StaffedFlowHospital.DatabaseEndocrinologist.customConditions.add { @@ -220,7 +220,7 @@ class VaultObserverExceptionTest { CreateStateFlow.ErrorTarget.TxInvalidState, CreateStateFlow.ErrorTarget.FlowSwallowErrors)) val flowResult = flowHandle.returnValue - assertFailsWith("PersistenceException") { flowResult.getOrThrow(30.seconds) } + assertFailsWith("PersistenceException") { flowResult.getOrThrow(waitForFlowDuration) } Assert.assertTrue("Flow has not been to hospital", counter > 0) } } @@ -231,7 +231,7 @@ class VaultObserverExceptionTest { * Trying to catch and suppress that exception inside the service does protect the service, but the new * interceptor will fail the flow anyway. The flow will be kept in for observation if errors persist. */ - @Test + @Test(timeout=300_000) fun persistenceExceptionOnFlushInVaultObserverCannotBeSuppressedInService() { var counter = 0 StaffedFlowHospital.DatabaseEndocrinologist.customConditions.add { @@ -257,7 +257,7 @@ class VaultObserverExceptionTest { CreateStateFlow.ErrorTarget.TxInvalidState, CreateStateFlow.ErrorTarget.ServiceSwallowErrors)) val flowResult = flowHandle.returnValue - assertFailsWith("PersistenceException") { flowResult.getOrThrow(30.seconds) } + assertFailsWith("PersistenceException") { flowResult.getOrThrow(waitForFlowDuration) } Assert.assertTrue("Flow has not been to hospital", counter > 0) } } @@ -267,7 +267,7 @@ class VaultObserverExceptionTest { * therefore handling it in flow code is no good, and the error will be passed to the flow hospital via the * interceptor. */ - @Test + @Test(timeout=300_000) fun syntaxErrorInUserCodeInServiceCannotBeSuppressedInFlow() { val testControlFuture = openFuture() StaffedFlowHospital.onFlowKeptForOvernightObservation.add { _, _ -> @@ -288,7 +288,7 @@ class VaultObserverExceptionTest { log.info("Flow has finished") testControlFuture.set(false) } - Assert.assertTrue("Flow has not been kept in hospital", testControlFuture.getOrThrow(30.seconds)) + Assert.assertTrue("Flow has not been kept in hospital", testControlFuture.getOrThrow(waitForFlowDuration)) } } @@ -296,7 +296,7 @@ class VaultObserverExceptionTest { * User code throwing a syntax error and catching suppressing that within the observer code is fine * and should not have any impact on the rest of the flow */ - @Test + @Test(timeout=300_000) fun syntaxErrorInUserCodeInServiceCanBeSuppressedInService() { driver(DriverParameters( startNodesInProcess = true, @@ -307,7 +307,7 @@ class VaultObserverExceptionTest { CreateStateFlow.ErrorTarget.ServiceSqlSyntaxError, CreateStateFlow.ErrorTarget.ServiceSwallowErrors)) val flowResult = flowHandle.returnValue - flowResult.getOrThrow(30.seconds) + flowResult.getOrThrow(waitForFlowDuration) } } } \ No newline at end of file diff --git a/testing/node-driver/src/main/kotlin/net/corda/testing/driver/Driver.kt b/testing/node-driver/src/main/kotlin/net/corda/testing/driver/Driver.kt index 33953c76df..a5085085d4 100644 --- a/testing/node-driver/src/main/kotlin/net/corda/testing/driver/Driver.kt +++ b/testing/node-driver/src/main/kotlin/net/corda/testing/driver/Driver.kt @@ -26,7 +26,6 @@ import net.corda.testing.node.internal.genericDriver import net.corda.testing.node.internal.getTimestampAsDirectoryName import net.corda.testing.node.internal.newContext import rx.Observable -import java.io.File import java.nio.file.Path import java.nio.file.Paths import java.util.concurrent.atomic.AtomicInteger @@ -67,8 +66,6 @@ interface NodeHandle : AutoCloseable { fun stop() } -fun NodeHandle.logFile(): File = (baseDirectory / "logs").toFile().walk().filter { it.name.startsWith("node-") && it.extension == "log" }.single() - /** Interface which represents an out of process node and exposes its process handle. **/ @DoNotImplement interface OutOfProcess : NodeHandle { diff --git a/testing/test-common/src/main/resources/log4j2-test.xml b/testing/test-common/src/main/resources/log4j2-test.xml index 45910c8ca5..d8d489af1f 100644 --- a/testing/test-common/src/main/resources/log4j2-test.xml +++ b/testing/test-common/src/main/resources/log4j2-test.xml @@ -1,5 +1,5 @@ - + ${sys:log-path:-logs} @@ -63,14 +63,17 @@ + + + diff --git a/testing/test-db/src/test/resources/log4j2-test.xml b/testing/test-db/src/test/resources/log4j2-test.xml index 35b51709ed..d0941363a9 100644 --- a/testing/test-db/src/test/resources/log4j2-test.xml +++ b/testing/test-db/src/test/resources/log4j2-test.xml @@ -1,5 +1,5 @@ - + ${sys:log-path:-logs} @@ -65,14 +65,17 @@ + + +