From 7370235c3ec2e9443495b7e6c21c013f05e805dd Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 21 Aug 2009 21:28:15 +0000 Subject: [PATCH] Create a more specific error code for when specific sockets are not available. Ensure that slurm 2.0 gets the expected error return if the process can't start for that reason so it can take corrective action. This commit was SVN r21867. --- orte/include/orte/constants.h | 3 ++- orte/mca/ess/slurmd/ess_slurmd_module.c | 9 ++++++++- orte/mca/oob/tcp/oob_tcp.c | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index 2f4267d670..1917c69097 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -98,7 +98,8 @@ enum { ORTE_ERR_FAILED_TO_START = (ORTE_ERR_BASE - 26), ORTE_ERR_FILE_NOT_EXECUTABLE = (ORTE_ERR_BASE - 27), ORTE_ERR_HNP_COULD_NOT_START = (ORTE_ERR_BASE - 28), - ORTE_ERR_SYS_LIMITS_SOCKETS = (ORTE_ERR_BASE - 29) + ORTE_ERR_SYS_LIMITS_SOCKETS = (ORTE_ERR_BASE - 29), + ORTE_ERR_SOCKET_NOT_AVAILABLE = (ORTE_ERR_BASE - 30) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) diff --git a/orte/mca/ess/slurmd/ess_slurmd_module.c b/orte/mca/ess/slurmd/ess_slurmd_module.c index 1bcf3dc7ab..328085eb57 100644 --- a/orte/mca/ess/slurmd/ess_slurmd_module.c +++ b/orte/mca/ess/slurmd/ess_slurmd_module.c @@ -83,6 +83,7 @@ orte_ess_base_module_t orte_ess_slurmd_module = { /* Local globals */ static bool app_init_complete; +static bool slurm20; /**** MODULE FUNCTIONS ****/ @@ -108,6 +109,7 @@ static int rte_init(void) /* init flag */ app_init_complete = false; + slurm20 = false; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -171,6 +173,7 @@ static int rte_init(void) * picked up by the OOB */ orte_oob_static_ports = strdup(envar); + slurm20 = true; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, "%s using SLURM-reserved ports %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -350,10 +353,14 @@ static int rte_init(void) return ORTE_SUCCESS; error: + if (ORTE_ERR_SOCKET_NOT_AVAILABLE == ret && slurm20) { + /* exit silently with a special error code for slurm 2.0 */ + exit(108); + } + orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); - return ret; } diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index aec3c8002b..67d9ca7625 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -797,7 +797,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t /* cleanup and return the error */ CLOSE_THE_SOCKET(*target_sd); opal_argv_free(ports); - return ORTE_ERROR; + return ORTE_ERR_SOCKET_NOT_AVAILABLE; socket_binded: