diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 865ad521d9..a102307e02 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -67,7 +67,8 @@ enum { OPAL_ERR_NOT_ENOUGH_CORES = (OPAL_ERR_BASE - 38), OPAL_ERR_INVALID_PHYS_CPU = (OPAL_ERR_BASE - 39), OPAL_ERR_MULTIPLE_AFFINITIES = (OPAL_ERR_BASE - 40), - OPAL_ERR_SLOT_LIST_RANGE = (OPAL_ERR_BASE - 41) + OPAL_ERR_SLOT_LIST_RANGE = (OPAL_ERR_BASE - 41), + OPAL_ERR_NETWORK_NOT_PARSEABLE = (OPAL_ERR_BASE - 42) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index ccd67c13cc..e840b367c3 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -198,7 +198,9 @@ opal_err2str(int errnum, const char **errmsg) case OPAL_ERR_SLOT_LIST_RANGE: retval = "Provided slot_list range is invalid"; break; - + case OPAL_ERR_NETWORK_NOT_PARSEABLE: + retval = "Provided network specification is not parseable"; + break; default: retval = NULL; } diff --git a/opal/util/help-opal-util.txt b/opal/util/help-opal-util.txt index dfc4f1469b..6f64e7b7bd 100644 --- a/opal/util/help-opal-util.txt +++ b/opal/util/help-opal-util.txt @@ -36,9 +36,14 @@ of the form /. For example: The first detected malformed entry was %s. # [invalid-net-mask] -Open MPI has detected an malformed IPv4 address or netmask -in %s. Accepted values follows the Classless Inter-Domain +Open MPI has detected a malformed IPv4 address or netmask: + + Value provided: %s + +Accepted values follow the Classless Inter-Domain Routing (CIDR) notation specifications. For example: - 10.0.0.0/8;172.16/12;192.168;169.254.0.0/16 - + 10.0.0.0/8 + 172.16/12 + 192.168 + 169.254.0.0/16 diff --git a/opal/util/if.c b/opal/util/if.c index 7c45ac20a5..bb96abad24 100644 --- a/opal/util/if.c +++ b/opal/util/if.c @@ -500,7 +500,7 @@ static uint32_t parse_ipv4_dots(const char *addr, uint32_t* net, int* dots) for( i = 0; i < 4; i++ ) { n[i] = strtoul(start, (char**)&end, 10); if( end == start ) { /* error case: use what we have so far */ - rc = OPAL_ERROR; + rc = OPAL_SUCCESS; break; } /* skip all the . */ @@ -508,12 +508,12 @@ static uint32_t parse_ipv4_dots(const char *addr, uint32_t* net, int* dots) if( '.' != *start ) break; /* did we read something sensible? */ if( n[i] > 255 ) { - return OPAL_ERROR; + return OPAL_ERR_NETWORK_NOT_PARSEABLE; } } *dots = i; *net = OPAL_IF_ASSEMBLE_NETWORK(n[0], n[1], n[2], n[3]); - return OPAL_SUCCESS; + return rc; } int @@ -541,29 +541,32 @@ opal_iftupletoaddr(const char *inaddr, uint32_t *net, uint32_t *mask) pval = strtol(ptr, NULL, 10); if ((pval > 31) || (pval < 1)) { opal_output(0, "opal_iftupletoaddr: unknown mask"); - return OPAL_ERROR; + return OPAL_ERR_NETWORK_NOT_PARSEABLE; } *mask = 0xFFFFFFFF << (32 - pval); } } else { /* use the number of dots to determine it */ - for( ptr = inaddr, pval = 0; '\0'!= *ptr; ptr++ ) - if( '.' == *ptr ) pval++; + for (ptr = inaddr, pval = 0; '\0'!= *ptr; ptr++) { + if ('.' == *ptr) { + pval++; + } + } /* if we have three dots, then we have four * fields since it is a full address, so the * default netmask is fine */ - if (pval < 4) { - if (3 == pval) { /* 2 dots */ - *mask = 0xFFFFFF00; - } else if (2 == pval) { /* 1 dot */ - *mask = 0xFFFF0000; - } else if (1 == pval) { /* no dots */ - *mask = 0xFF000000; - } else { - opal_output(0, "opal_iftupletoaddr: unknown mask"); - return OPAL_ERROR; - } + if (3 == pval) { + *mask = 0xFFFFFFFF; + } else if (2 == pval) { /* 2 dots */ + *mask = 0xFFFFFF00; + } else if (1 == pval) { /* 1 dot */ + *mask = 0xFFFF0000; + } else if (0 == pval) { /* no dots */ + *mask = 0xFF000000; + } else { + opal_output(0, "opal_iftupletoaddr: unknown mask"); + return OPAL_ERR_NETWORK_NOT_PARSEABLE; } } } @@ -605,18 +608,18 @@ bool opal_ifisloopback(int if_index) * into account that the list entries could be given as named interfaces, * IP addrs, or subnet+mask */ -bool opal_ifmatches(int idx, char **nets) +int opal_ifmatches(int idx, char **nets) { bool named_if; - int i; + int i, rc; size_t j; int index; struct sockaddr_in inaddr; uint32_t addr, netaddr, netmask; /* get the address info for the given network in case we need it */ - if (OPAL_SUCCESS != opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr))) { - return false; + if (OPAL_SUCCESS != (rc = opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr)))) { + return rc; } addr = ntohl(inaddr.sin_addr.s_addr); @@ -636,20 +639,20 @@ bool opal_ifmatches(int idx, char **nets) continue; } if (index == idx) { - return true; + return OPAL_SUCCESS; } } else { - if (OPAL_SUCCESS != opal_iftupletoaddr(nets[i], &netaddr, &netmask)) { + if (OPAL_SUCCESS != (rc = opal_iftupletoaddr(nets[i], &netaddr, &netmask))) { opal_show_help("help-opal-util.txt", "invalid-net-mask", true, nets[i]); - continue; + return rc; } if (netaddr == (addr & netmask)) { - return true; + return OPAL_SUCCESS; } } } /* get here if not found */ - return false; + return OPAL_ERR_NOT_FOUND; } @@ -744,9 +747,9 @@ opal_iftupletoaddr(char *inaddr, uint32_t *net, uint32_t *mask) return 0; } -bool opal_ifispresent(char *if) +int opal_ifmatches(int idx, char **nets) { - return false; + return OPAL_ERR_NOT_SUPPORTED; } #endif /* HAVE_STRUCT_SOCKADDR_IN */ diff --git a/opal/util/if.h b/opal/util/if.h index 92b4913bd7..a56324a4de 100644 --- a/opal/util/if.h +++ b/opal/util/if.h @@ -191,7 +191,7 @@ OPAL_DECLSPEC bool opal_ifisloopback(int if_index); /* * Determine if a specified interface is included in a NULL-terminated argv array */ -OPAL_DECLSPEC bool opal_ifmatches(int idx, char **nets); +OPAL_DECLSPEC int opal_ifmatches(int idx, char **nets); END_C_DECLS diff --git a/orte/mca/oob/base/oob_base_init.c b/orte/mca/oob/base/oob_base_init.c index c77fad1375..cf297f67ca 100644 --- a/orte/mca/oob/base/oob_base_init.c +++ b/orte/mca/oob/base/oob_base_init.c @@ -90,10 +90,11 @@ int mca_oob_base_init(void) } } /* set the global variable to point to the first initialize module */ - if(s_module == NULL) { + if (s_module == NULL) { opal_output_verbose(10, mca_oob_base_output, "mca_oob_base_init: no OOB modules available\n"); - return ORTE_ERROR; - } + /* the oob modules will have printed out an error msg - so be silent here */ + return ORTE_ERR_SILENT; + } mca_oob = *s_module; return ORTE_SUCCESS; diff --git a/orte/mca/oob/tcp/help-oob-tcp.txt b/orte/mca/oob/tcp/help-oob-tcp.txt index 70a79076fc..204d291d73 100644 --- a/orte/mca/oob/tcp/help-oob-tcp.txt +++ b/orte/mca/oob/tcp/help-oob-tcp.txt @@ -33,3 +33,29 @@ Both TCP interface include and exclude lists were specified: Exclude: %s Only one of these can be given. +# +[not-parseable] +The specified network is not parseable. Since we cannot determine +your desired intent, we cannot establish a TCP socket for out-of-band +communications and will therefore abort. Please correct the network +specification and retry. +# +[no-included-found] +None of the networks specified to be included for out-of-band communications +could be found: + + Value given: %s + +Please revise the specification and try again. +# +[excluded-all] +The specified list of networks to be excluded for out-of-band communications +resulted in no networks being available: + + Value given: %s + +Please revise the specification and try again. +# +[no-interfaces-avail] +No network interfaces were found for out-of-band communications. We require +at least one available network for TCP-based messaging. diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index a8eb39223f..9bab1409d5 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -1296,12 +1296,12 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user) */ mca_oob_t* mca_oob_tcp_component_init(int* priority) { - int i; + int i, rc; bool found_local = false; bool found_nonlocal = false; char **interfaces = NULL; mca_oob_tcp_device_t *dev; - bool including = true; + bool including = false, excluding = false; char name[32]; *priority = 1; @@ -1328,9 +1328,11 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) if (NULL != mca_oob_tcp_component.tcp_include) { interfaces = opal_argv_split(mca_oob_tcp_component.tcp_include, ','); including = true; + excluding = false; } else if (NULL != mca_oob_tcp_component.tcp_exclude) { interfaces = opal_argv_split(mca_oob_tcp_component.tcp_exclude, ','); including = false; + excluding = true; } /* look at all available interfaces */ @@ -1341,9 +1343,19 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) /* handle include/exclude directives */ if (NULL != interfaces) { + /* check for match */ + rc = opal_ifmatches(i, interfaces); + /* if one of the network specifications isn't parseable, then + * error out as we can't do what was requested + */ + if (OPAL_ERR_NETWORK_NOT_PARSEABLE == rc) { + orte_show_help("help-oob-tcp.txt", "not-parseable", true); + opal_argv_free(interfaces); + return NULL; + } /* if we are including, then ignore this if not present */ if (including) { - if (!opal_ifmatches(i, interfaces)) { + if (OPAL_SUCCESS != rc) { OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle, "%s oob:tcp:init rejecting interface %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name)); @@ -1351,7 +1363,7 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) } } else { /* we are excluding, so ignore if present */ - if (opal_ifmatches(i, interfaces)) { + if (OPAL_SUCCESS == rc) { OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle, "%s oob:tcp:init rejecting interface %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name)); @@ -1402,6 +1414,13 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) } if (opal_list_get_size(&mca_oob_tcp_component.tcp_available_devices) == 0) { + if (including) { + orte_show_help("help-oob-tcp.txt", "no-included-found", true, mca_oob_tcp_component.tcp_include); + } else if (excluding) { + orte_show_help("help-oob-tcp.txt", "excluded-all", true, mca_oob_tcp_component.tcp_exclude); + } else { + orte_show_help("help-oob-tcp.txt", "no-interfaces-avail", true); + } return NULL; } diff --git a/orte/mca/rml/base/rml_base_components.c b/orte/mca/rml/base/rml_base_components.c index 76d1235106..70b746cc12 100644 --- a/orte/mca/rml/base/rml_base_components.c +++ b/orte/mca/rml/base/rml_base_components.c @@ -132,6 +132,7 @@ orte_rml_base_select(void) orte_rml_component_t *wrapper_component = NULL; orte_rml_module_t *wrapper_module = NULL; char *rml_wrapper = NULL; + bool return_silent=false; if (selected) { return ORTE_SUCCESS; @@ -167,6 +168,9 @@ orte_rml_base_select(void) if (NULL == module) { opal_output_verbose(10, orte_rml_base_output, "orte_rml_base_select: init returned failure"); + if (priority < 0) { + return_silent = true; + } continue; } @@ -241,7 +245,12 @@ orte_rml_base_select(void) free(rml_wrapper); } - if (NULL == selected_component) return ORTE_ERROR; + if (NULL == selected_component) { + if (return_silent) { + return ORTE_ERR_SILENT; + } + return ORTE_ERROR; + } return ORTE_SUCCESS; } diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index 7043f55169..599ae176c0 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -144,8 +144,11 @@ rml_oob_init(int* priority) return &orte_rml_oob_module.super; } - if (mca_oob_base_init() != ORTE_SUCCESS) + if (mca_oob_base_init() != ORTE_SUCCESS) { + *priority = -1; return NULL; + } + *priority = 1; OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);