From 879243e81fd55f54739fbdae4d5b8666f5acb9a9 Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Fri, 18 Apr 2025 23:20:00 +0200 Subject: [PATCH 1/6] qa refactor: wait_for_rpc_connection - Treat OSErrors the same ConnectionResetError is an OSError as well (ECONNRESET), no reason to have a separate except-block for it. Also improves comments for other exceptions and make condition above more Pythonic. --- test/functional/test_framework/test_node.py | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index 7416e08a1f..a81905dc0a 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -311,23 +311,23 @@ class TestNode(): self.rpc_connected = True self.url = self.rpc.rpc_url return - except JSONRPCException as e: # Initialization phase + except JSONRPCException as e: + # Suppress these as they are expected during initialization. # -28 RPC in warmup - # -342 Service unavailable, RPC server started but is shutting down due to error - if e.error['code'] != -28 and e.error['code'] != -342: + # -342 Service unavailable, could be starting up or shutting down + if e.error['code'] not in [-28, -342]: raise # unknown JSON RPC exception - except ConnectionResetError: - # This might happen when the RPC server is in warmup, but shut down before the call to getblockcount - # succeeds. Try again to properly raise the FailedToStartError - pass except OSError as e: - if e.errno == errno.ETIMEDOUT: - pass # Treat identical to ConnectionResetError - elif e.errno == errno.ECONNREFUSED: - pass # Port not yet open? - else: + # Suppress similarly to the above JSONRPCException errors. + if e.errno not in [ + errno.ECONNRESET, # This might happen when the RPC server is in warmup, + # but shut down before the call to getblockcount succeeds. + errno.ETIMEDOUT, # Treat identical to ECONNRESET + errno.ECONNREFUSED # Port not yet open? + ]: raise # unknown OS error - except ValueError as e: # cookie file not found and no rpcuser or rpcpassword; bitcoind is still starting + except ValueError as e: + # Suppress if cookie file isn't generated yet and no rpcuser or rpcpassword; bitcoind may be starting. if "No RPC credentials" not in str(e): raise time.sleep(1.0 / poll_per_s) From 6ad21b4c0114029d16d334bf8d437834708a295e Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:56:16 +0100 Subject: [PATCH 2/6] qa: Include ignored errors in RPC connection timeout When an RPC connection attempt with bitcoind times out, include which ignored errors occurred in the exception message. May provide clues of what has gone wrong. --- test/functional/test_framework/test_node.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index a81905dc0a..2c4b876962 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) 2017-2022 The Bitcoin Core developers +# Copyright (c) 2017-present The Bitcoin Core developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. """Class for bitcoind node under test""" @@ -265,6 +265,8 @@ class TestNode(): """Sets up an RPC connection to the bitcoind process. Returns False if unable to connect.""" # Poll at a rate of four times per second poll_per_s = 4 + suppressed_errors = collections.defaultdict(int) + latest_error = "" for _ in range(poll_per_s * self.rpc_timeout): if self.process.poll() is not None: # Attach abrupt shutdown error/s to the exception message @@ -317,6 +319,8 @@ class TestNode(): # -342 Service unavailable, could be starting up or shutting down if e.error['code'] not in [-28, -342]: raise # unknown JSON RPC exception + suppressed_errors[f"JSONRPCException {e.error['code']}"] += 1 + latest_error = repr(e) except OSError as e: # Suppress similarly to the above JSONRPCException errors. if e.errno not in [ @@ -326,12 +330,16 @@ class TestNode(): errno.ECONNREFUSED # Port not yet open? ]: raise # unknown OS error + suppressed_errors[f"OSError {errno.errorcode[e.errno]}"] += 1 + latest_error = repr(e) except ValueError as e: # Suppress if cookie file isn't generated yet and no rpcuser or rpcpassword; bitcoind may be starting. if "No RPC credentials" not in str(e): raise + suppressed_errors["missing_credentials"] += 1 + latest_error = repr(e) time.sleep(1.0 / poll_per_s) - self._raise_assertion_error("Unable to connect to bitcoind after {}s".format(self.rpc_timeout)) + self._raise_assertion_error(f"Unable to connect to bitcoind after {self.rpc_timeout}s (ignored errors: {str(dict(suppressed_errors))}, latest error: {latest_error})") def wait_for_cookie_credentials(self): """Ensures auth cookie credentials can be read, e.g. for testing CLI with -rpcwait before RPC connection is up.""" From 9b24a403fae4b896ff7705519bd48c877b4e621b Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Thu, 13 Feb 2025 09:18:24 +0100 Subject: [PATCH 3/6] qa: Only allow calling TestNode.stop() after connecting (Still tolerate calling it on a no longer (self.)running node, as in a node that has been queried for is_node_stopped() and modified state before returning True). Tests should not attempt to use the non-functioning RPC interface to call stop() unless wait_for_connections() has succeeded. No longer log and suppress http.client.CannotSendRequest as a consequence of stop()-RPC, as error conditions causing this knock-on issue are now guarded against before the call. --- test/functional/test_framework/test_node.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index 2c4b876962..dc4efe88df 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -8,7 +8,6 @@ import contextlib import decimal import errno from enum import Enum -import http.client import json import logging import os @@ -307,10 +306,11 @@ class TestNode(): # overhead is trivial, and the added guarantees are worth # the minimal performance cost. self.log.debug("RPC successfully started") + # Set rpc_connected even if we are in use_cli mode so that we know we can call self.stop() if needed. + self.rpc_connected = True if self.use_cli: return self.rpc = rpc - self.rpc_connected = True self.url = self.rpc.rpc_url return except JSONRPCException as e: @@ -396,15 +396,15 @@ class TestNode(): """Stop the node.""" if not self.running: return + assert self.rpc_connected, self._node_msg( + "Should only call stop_node() on a running node after wait_for_rpc_connection() succeeded. " + f"Did you forget to call the latter after start()? Not connected to process: {self.process.pid}") self.log.debug("Stopping node") - try: - # Do not use wait argument when testing older nodes, e.g. in wallet_backwards_compatibility.py - if self.version_is_at_least(180000): - self.stop(wait=wait) - else: - self.stop() - except http.client.CannotSendRequest: - self.log.exception("Unable to stop node.") + # Do not use wait argument when testing older nodes, e.g. in wallet_backwards_compatibility.py + if self.version_is_at_least(180000): + self.stop(wait=wait) + else: + self.stop() # If there are any running perf processes, stop them. for profile_name in tuple(self.perf_subprocesses.keys()): From 1f639efca5e71a0ff208415d94e408a74778d4db Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Mon, 21 Apr 2025 14:15:34 +0200 Subject: [PATCH 4/6] qa: Work around Python socket timeout issue Observed on local machine running Windows / Python v3.13.1 when overriding rpc_timeout to small values (5- seconds). Next commit performs such overrides. --- test/functional/test_framework/test_node.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index dc4efe88df..953badc3fe 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -322,15 +322,21 @@ class TestNode(): suppressed_errors[f"JSONRPCException {e.error['code']}"] += 1 latest_error = repr(e) except OSError as e: + error_num = e.errno + # Work around issue where socket timeouts don't have errno set. + # https://github.com/python/cpython/issues/109601 + if error_num is None and isinstance(e, TimeoutError): + error_num = errno.ETIMEDOUT + # Suppress similarly to the above JSONRPCException errors. - if e.errno not in [ + if error_num not in [ errno.ECONNRESET, # This might happen when the RPC server is in warmup, # but shut down before the call to getblockcount succeeds. errno.ETIMEDOUT, # Treat identical to ECONNRESET errno.ECONNREFUSED # Port not yet open? ]: raise # unknown OS error - suppressed_errors[f"OSError {errno.errorcode[e.errno]}"] += 1 + suppressed_errors[f"OSError {errno.errorcode[error_num]}"] += 1 latest_error = repr(e) except ValueError as e: # Suppress if cookie file isn't generated yet and no rpcuser or rpcpassword; bitcoind may be starting. From 28e282ef9ae94ede4aace6b97ff18c66cb72a001 Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Fri, 25 Apr 2025 14:39:16 +0200 Subject: [PATCH 5/6] qa: assert_raises_message() - Stop assuming certain structure for exceptions --- test/functional/test_framework/util.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/functional/test_framework/util.py b/test/functional/test_framework/util.py index 903dd1b2b5..c35d41124d 100644 --- a/test/functional/test_framework/util.py +++ b/test/functional/test_framework/util.py @@ -101,10 +101,9 @@ def assert_raises_message(exc, message, fun, *args, **kwds): except JSONRPCException: raise AssertionError("Use assert_raises_rpc_error() to test RPC failures") except exc as e: - if message is not None and message not in e.error['message']: - raise AssertionError( - "Expected substring not found in error message:\nsubstring: '{}'\nerror message: '{}'.".format( - message, e.error['message'])) + if message is not None and message not in repr(e): + raise AssertionError("Expected substring not found in exception:\n" + f"substring: '{message}'\nexception: {repr(e)}.") except Exception as e: raise AssertionError("Unexpected exception raised: " + type(e).__name__) else: From 10845cd7cc89fc83b4ee844dc577b391720bba9c Mon Sep 17 00:00:00 2001 From: Hodlinator <172445034+hodlinator@users.noreply.github.com> Date: Mon, 21 Apr 2025 15:28:39 +0200 Subject: [PATCH 6/6] qa: Add feature_framework_startup_failures.py --- .../feature_framework_startup_failures.py | 172 ++++++++++++++++++ test/functional/test_runner.py | 3 +- 2 files changed, 174 insertions(+), 1 deletion(-) create mode 100755 test/functional/feature_framework_startup_failures.py diff --git a/test/functional/feature_framework_startup_failures.py b/test/functional/feature_framework_startup_failures.py new file mode 100755 index 0000000000..d6da7f713b --- /dev/null +++ b/test/functional/feature_framework_startup_failures.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025-present The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +""" +Verify framework startup failures only raise one exception since +multiple exceptions being raised muddies the waters of what actually +went wrong. We should maintain this bar of only raising one exception as +long as additional maintenance and complexity is low. + +Test relaunches itself into child processes in order to trigger failures +without the parent process' BitcoinTestFramework also failing. +""" + +from test_framework.util import ( + assert_raises_message, + rpc_port, +) +from test_framework.test_framework import BitcoinTestFramework + +from hashlib import md5 +from os import linesep +import re +import subprocess +import sys +import time + +class FeatureFrameworkStartupFailures(BitcoinTestFramework): + def set_test_params(self): + self.num_nodes = 1 + + def setup_network(self): + # Don't start the node yet, as we want to measure during run_test. + self.add_nodes(self.num_nodes, self.extra_args) + + # Launches a child test process that runs this same file, but instantiates + # a child test. Verifies that it raises only the expected exception, once. + def _verify_startup_failure(self, test, internal_args, expected_exception): + # Inherit args from parent, only modifying tmpdir so children don't fail + # as a cause of colliding with the parent dir. + parent_args = sys.argv.copy() + assert self.options.tmpdir, "Framework should always set tmpdir." + i, path = next(((i, m[1]) for i, arg in enumerate(parent_args) if (m := re.match(r'--tm(?:p(?:d(?:ir?)?)?)?=(.+)', arg))), + (len(parent_args), self.options.tmpdir)) + subdir = md5(expected_exception.encode('utf-8')).hexdigest()[:8] + parent_args[i:i + 1] = [f"--tmpdir={path}/{subdir}"] + args = [sys.executable] + parent_args + [f"--internal_test={test.__name__}"] + internal_args + + try: + # The subprocess encoding argument gives different results for e.output + # on Linux/Windows, so handle decoding by ourselves for consistency. + output = subprocess.run(args, timeout=60 * self.options.timeout_factor, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.decode("utf-8") + except subprocess.TimeoutExpired as e: + print("Unexpected child process timeout!\n" + "WARNING: Timeouts like this halt execution of TestNode logic, " + "meaning dangling bitcoind processes are to be expected.\n" + f"\n{e.output.decode('utf-8')}\n", + file=sys.stderr) + raise + + errors = [] + if (n := output.count("Traceback")) != 1: + errors.append(f"Found {n}/1 tracebacks - expecting exactly one with no knock-on exceptions.") + if (n := len(re.findall(expected_exception, output))) != 1: + errors.append(f"Found {n}/1 occurrences of the specific exception: {expected_exception}") + if (n := output.count("Test failed. Test logging available at")) != 1: + errors.append(f"Found {n}/1 test failure output messages.") + + assert not errors, f"Child test didn't contain (only) expected errors:\n{linesep.join(errors)}\n\n{output}\n\n" + + def run_test(self): + self.log.info("Verifying _verify_startup_failure() functionality (self-check).") + assert_raises_message( + AssertionError, + ("Child test didn't contain (only) expected errors:\n" + + linesep.join(["Found 0/1 tracebacks - expecting exactly one with no knock-on exceptions.", + "Found 0/1 occurrences of the specific exception: NonExistentError", + "Found 0/1 test failure output messages."])).encode("unicode_escape").decode("utf-8"), + self._verify_startup_failure, + TestSuccess, [], + "NonExistentError", + ) + + self.log.info("Parent process is measuring node startup duration in order to obtain a reasonable timeout value for later test...") + node_start_time = time.time() + self.nodes[0].start() + self.nodes[0].wait_for_rpc_connection() + node_start_duration = time.time() - node_start_time + self.nodes[0].stop_node() + self.log.info(f"...measured {node_start_duration:.1f}s.") + + self.log.info("Verifying inability to connect to bitcoind's RPC interface due to wrong port results in one exception containing at least one OSError.") + self._verify_startup_failure( + TestWrongRpcPortStartupFailure, [f"--internal_node_start_duration={node_start_duration}"], + r"AssertionError: \[node 0\] Unable to connect to bitcoind after \d+s \(ignored errors: {[^}]*'OSError \w+'?: \d+[^}]*}, latest error: \w+\([^)]+\)\)" + ) + + self.log.info("Verifying startup failure due to invalid arg results in only one exception.") + self._verify_startup_failure( + TestInitErrorStartupFailure, [], + r"FailedToStartError: \[node 0\] bitcoind exited with status 1 during initialization\. Error: Error parsing command line arguments: Invalid parameter -nonexistentarg" + ) + + self.log.info("Verifying start() then stop_node() on a node without wait_for_rpc_connection() in between raises an assert.") + self._verify_startup_failure( + TestStartStopStartupFailure, [], + r"AssertionError: \[node 0\] Should only call stop_node\(\) on a running node after wait_for_rpc_connection\(\) succeeded\. Did you forget to call the latter after start\(\)\? Not connected to process: \d+" + ) + +class InternalTestMixin: + def add_options(self, parser): + # Just here to silence unrecognized argument error, we actually read the value in the if-main at the bottom. + parser.add_argument("--internal_test", dest="internal_never_read", help="ONLY TO BE USED WHEN TEST RELAUNCHES ITSELF") + +class TestWrongRpcPortStartupFailure(InternalTestMixin, BitcoinTestFramework): + def add_options(self, parser): + parser.add_argument("--internal_node_start_duration", dest="node_start_duration", help="ONLY TO BE USED WHEN TEST RELAUNCHES ITSELF", type=float) + InternalTestMixin.add_options(self, parser) + + def set_test_params(self): + self.num_nodes = 1 + # Override RPC listen port to something TestNode isn't expecting so that + # we are unable to establish an RPC connection. + self.extra_args = [[f"-rpcport={rpc_port(2)}"]] + # Override the timeout to avoid waiting unnecessarily long to realize + # nothing is on that port. Divide by timeout_factor to counter + # multiplication in base, 2 * node_start_duration should be enough. + self.rpc_timeout = max(3, 2 * self.options.node_start_duration) / self.options.timeout_factor + + def run_test(self): + assert False, "Should have failed earlier during startup." + +class TestInitErrorStartupFailure(InternalTestMixin, BitcoinTestFramework): + def set_test_params(self): + self.num_nodes = 1 + self.extra_args = [["-nonexistentarg"]] + + def run_test(self): + assert False, "Should have failed earlier during startup." + +class TestStartStopStartupFailure(InternalTestMixin, BitcoinTestFramework): + def set_test_params(self): + self.num_nodes = 1 + + def setup_network(self): + self.add_nodes(self.num_nodes, self.extra_args) + self.nodes[0].start() + self.nodes[0].stop_node() + assert False, "stop_node() should raise an exception when we haven't called wait_for_rpc_connection()" + + def run_test(self): + assert False, "Should have failed earlier during startup." + +class TestSuccess(InternalTestMixin, BitcoinTestFramework): + def set_test_params(self): + self.num_nodes = 1 + + def setup_network(self): + pass # Don't need to start our node. + + def run_test(self): + pass # Just succeed. + + +if __name__ == '__main__': + if class_name := next((m[1] for arg in sys.argv[1:] if (m := re.match(r'--internal_test=(.+)', arg))), None): + internal_test = globals().get(class_name) + assert internal_test, f"Unrecognized test: {class_name}" + internal_test(__file__).main() + else: + FeatureFrameworkStartupFailures(__file__).main() diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py index 3df1644622..abecb25c03 100755 --- a/test/functional/test_runner.py +++ b/test/functional/test_runner.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) 2014-2022 The Bitcoin Core developers +# Copyright (c) 2014-present The Bitcoin Core developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. """Run regression test suite. @@ -410,6 +410,7 @@ BASE_SCRIPTS = [ 'p2p_handshake.py --v2transport', 'feature_dirsymlinks.py', 'feature_help.py', + 'feature_framework_startup_failures.py', 'feature_shutdown.py', 'wallet_migration.py', 'p2p_ibd_txrelay.py',