From f3cae3d6f3226a26a8fc8d5c7fd98845a9c2af51 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 7 Jun 2011 02:09:11 +0000 Subject: [PATCH] Cleanup the handling of if_include and if_exclude arguments based on CIDR notation. Fix a bug in the new code that prevented the system from correctly matching addresses. Remove comments in the show-help text indicating that we would continue in the face of incorrect specifications - leave that to the calling layer to decide. Modify the new opal_ifmatches so it returns error codes letting the caller better understand the result. Modify the oob to ensure we abort if we don't find interfaces matching specified constraints, and that we do so without multiple error messages. NOTE: we have a conflict in our standards. We have been using comma-delimited lists of interfaces for all our params. However, one param - opal_net_private_ipv4 - now uses semicolons instead of comma separators. No idea why, but it is confusing. This commit was SVN r24755. --- opal/include/opal/constants.h | 3 +- opal/runtime/opal_init.c | 4 +- opal/util/help-opal-util.txt | 13 ++++-- opal/util/if.c | 59 +++++++++++++------------ opal/util/if.h | 2 +- orte/mca/oob/base/oob_base_init.c | 7 +-- orte/mca/oob/tcp/help-oob-tcp.txt | 26 +++++++++++ orte/mca/oob/tcp/oob_tcp.c | 27 +++++++++-- orte/mca/rml/base/rml_base_components.c | 11 ++++- orte/mca/rml/oob/rml_oob_component.c | 5 ++- 10 files changed, 113 insertions(+), 44 deletions(-) diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 865ad521d9..a102307e02 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -67,7 +67,8 @@ enum { OPAL_ERR_NOT_ENOUGH_CORES = (OPAL_ERR_BASE - 38), OPAL_ERR_INVALID_PHYS_CPU = (OPAL_ERR_BASE - 39), OPAL_ERR_MULTIPLE_AFFINITIES = (OPAL_ERR_BASE - 40), - OPAL_ERR_SLOT_LIST_RANGE = (OPAL_ERR_BASE - 41) + OPAL_ERR_SLOT_LIST_RANGE = (OPAL_ERR_BASE - 41), + OPAL_ERR_NETWORK_NOT_PARSEABLE = (OPAL_ERR_BASE - 42) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index ccd67c13cc..e840b367c3 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -198,7 +198,9 @@ opal_err2str(int errnum, const char **errmsg) case OPAL_ERR_SLOT_LIST_RANGE: retval = "Provided slot_list range is invalid"; break; - + case OPAL_ERR_NETWORK_NOT_PARSEABLE: + retval = "Provided network specification is not parseable"; + break; default: retval = NULL; } diff --git a/opal/util/help-opal-util.txt b/opal/util/help-opal-util.txt index dfc4f1469b..6f64e7b7bd 100644 --- a/opal/util/help-opal-util.txt +++ b/opal/util/help-opal-util.txt @@ -36,9 +36,14 @@ of the form /. For example: The first detected malformed entry was %s. # [invalid-net-mask] -Open MPI has detected an malformed IPv4 address or netmask -in %s. Accepted values follows the Classless Inter-Domain +Open MPI has detected a malformed IPv4 address or netmask: + + Value provided: %s + +Accepted values follow the Classless Inter-Domain Routing (CIDR) notation specifications. For example: - 10.0.0.0/8;172.16/12;192.168;169.254.0.0/16 - + 10.0.0.0/8 + 172.16/12 + 192.168 + 169.254.0.0/16 diff --git a/opal/util/if.c b/opal/util/if.c index 7c45ac20a5..bb96abad24 100644 --- a/opal/util/if.c +++ b/opal/util/if.c @@ -500,7 +500,7 @@ static uint32_t parse_ipv4_dots(const char *addr, uint32_t* net, int* dots) for( i = 0; i < 4; i++ ) { n[i] = strtoul(start, (char**)&end, 10); if( end == start ) { /* error case: use what we have so far */ - rc = OPAL_ERROR; + rc = OPAL_SUCCESS; break; } /* skip all the . */ @@ -508,12 +508,12 @@ static uint32_t parse_ipv4_dots(const char *addr, uint32_t* net, int* dots) if( '.' != *start ) break; /* did we read something sensible? */ if( n[i] > 255 ) { - return OPAL_ERROR; + return OPAL_ERR_NETWORK_NOT_PARSEABLE; } } *dots = i; *net = OPAL_IF_ASSEMBLE_NETWORK(n[0], n[1], n[2], n[3]); - return OPAL_SUCCESS; + return rc; } int @@ -541,29 +541,32 @@ opal_iftupletoaddr(const char *inaddr, uint32_t *net, uint32_t *mask) pval = strtol(ptr, NULL, 10); if ((pval > 31) || (pval < 1)) { opal_output(0, "opal_iftupletoaddr: unknown mask"); - return OPAL_ERROR; + return OPAL_ERR_NETWORK_NOT_PARSEABLE; } *mask = 0xFFFFFFFF << (32 - pval); } } else { /* use the number of dots to determine it */ - for( ptr = inaddr, pval = 0; '\0'!= *ptr; ptr++ ) - if( '.' == *ptr ) pval++; + for (ptr = inaddr, pval = 0; '\0'!= *ptr; ptr++) { + if ('.' == *ptr) { + pval++; + } + } /* if we have three dots, then we have four * fields since it is a full address, so the * default netmask is fine */ - if (pval < 4) { - if (3 == pval) { /* 2 dots */ - *mask = 0xFFFFFF00; - } else if (2 == pval) { /* 1 dot */ - *mask = 0xFFFF0000; - } else if (1 == pval) { /* no dots */ - *mask = 0xFF000000; - } else { - opal_output(0, "opal_iftupletoaddr: unknown mask"); - return OPAL_ERROR; - } + if (3 == pval) { + *mask = 0xFFFFFFFF; + } else if (2 == pval) { /* 2 dots */ + *mask = 0xFFFFFF00; + } else if (1 == pval) { /* 1 dot */ + *mask = 0xFFFF0000; + } else if (0 == pval) { /* no dots */ + *mask = 0xFF000000; + } else { + opal_output(0, "opal_iftupletoaddr: unknown mask"); + return OPAL_ERR_NETWORK_NOT_PARSEABLE; } } } @@ -605,18 +608,18 @@ bool opal_ifisloopback(int if_index) * into account that the list entries could be given as named interfaces, * IP addrs, or subnet+mask */ -bool opal_ifmatches(int idx, char **nets) +int opal_ifmatches(int idx, char **nets) { bool named_if; - int i; + int i, rc; size_t j; int index; struct sockaddr_in inaddr; uint32_t addr, netaddr, netmask; /* get the address info for the given network in case we need it */ - if (OPAL_SUCCESS != opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr))) { - return false; + if (OPAL_SUCCESS != (rc = opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr)))) { + return rc; } addr = ntohl(inaddr.sin_addr.s_addr); @@ -636,20 +639,20 @@ bool opal_ifmatches(int idx, char **nets) continue; } if (index == idx) { - return true; + return OPAL_SUCCESS; } } else { - if (OPAL_SUCCESS != opal_iftupletoaddr(nets[i], &netaddr, &netmask)) { + if (OPAL_SUCCESS != (rc = opal_iftupletoaddr(nets[i], &netaddr, &netmask))) { opal_show_help("help-opal-util.txt", "invalid-net-mask", true, nets[i]); - continue; + return rc; } if (netaddr == (addr & netmask)) { - return true; + return OPAL_SUCCESS; } } } /* get here if not found */ - return false; + return OPAL_ERR_NOT_FOUND; } @@ -744,9 +747,9 @@ opal_iftupletoaddr(char *inaddr, uint32_t *net, uint32_t *mask) return 0; } -bool opal_ifispresent(char *if) +int opal_ifmatches(int idx, char **nets) { - return false; + return OPAL_ERR_NOT_SUPPORTED; } #endif /* HAVE_STRUCT_SOCKADDR_IN */ diff --git a/opal/util/if.h b/opal/util/if.h index 92b4913bd7..a56324a4de 100644 --- a/opal/util/if.h +++ b/opal/util/if.h @@ -191,7 +191,7 @@ OPAL_DECLSPEC bool opal_ifisloopback(int if_index); /* * Determine if a specified interface is included in a NULL-terminated argv array */ -OPAL_DECLSPEC bool opal_ifmatches(int idx, char **nets); +OPAL_DECLSPEC int opal_ifmatches(int idx, char **nets); END_C_DECLS diff --git a/orte/mca/oob/base/oob_base_init.c b/orte/mca/oob/base/oob_base_init.c index c77fad1375..cf297f67ca 100644 --- a/orte/mca/oob/base/oob_base_init.c +++ b/orte/mca/oob/base/oob_base_init.c @@ -90,10 +90,11 @@ int mca_oob_base_init(void) } } /* set the global variable to point to the first initialize module */ - if(s_module == NULL) { + if (s_module == NULL) { opal_output_verbose(10, mca_oob_base_output, "mca_oob_base_init: no OOB modules available\n"); - return ORTE_ERROR; - } + /* the oob modules will have printed out an error msg - so be silent here */ + return ORTE_ERR_SILENT; + } mca_oob = *s_module; return ORTE_SUCCESS; diff --git a/orte/mca/oob/tcp/help-oob-tcp.txt b/orte/mca/oob/tcp/help-oob-tcp.txt index 70a79076fc..204d291d73 100644 --- a/orte/mca/oob/tcp/help-oob-tcp.txt +++ b/orte/mca/oob/tcp/help-oob-tcp.txt @@ -33,3 +33,29 @@ Both TCP interface include and exclude lists were specified: Exclude: %s Only one of these can be given. +# +[not-parseable] +The specified network is not parseable. Since we cannot determine +your desired intent, we cannot establish a TCP socket for out-of-band +communications and will therefore abort. Please correct the network +specification and retry. +# +[no-included-found] +None of the networks specified to be included for out-of-band communications +could be found: + + Value given: %s + +Please revise the specification and try again. +# +[excluded-all] +The specified list of networks to be excluded for out-of-band communications +resulted in no networks being available: + + Value given: %s + +Please revise the specification and try again. +# +[no-interfaces-avail] +No network interfaces were found for out-of-band communications. We require +at least one available network for TCP-based messaging. diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index a8eb39223f..9bab1409d5 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -1296,12 +1296,12 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user) */ mca_oob_t* mca_oob_tcp_component_init(int* priority) { - int i; + int i, rc; bool found_local = false; bool found_nonlocal = false; char **interfaces = NULL; mca_oob_tcp_device_t *dev; - bool including = true; + bool including = false, excluding = false; char name[32]; *priority = 1; @@ -1328,9 +1328,11 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) if (NULL != mca_oob_tcp_component.tcp_include) { interfaces = opal_argv_split(mca_oob_tcp_component.tcp_include, ','); including = true; + excluding = false; } else if (NULL != mca_oob_tcp_component.tcp_exclude) { interfaces = opal_argv_split(mca_oob_tcp_component.tcp_exclude, ','); including = false; + excluding = true; } /* look at all available interfaces */ @@ -1341,9 +1343,19 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) /* handle include/exclude directives */ if (NULL != interfaces) { + /* check for match */ + rc = opal_ifmatches(i, interfaces); + /* if one of the network specifications isn't parseable, then + * error out as we can't do what was requested + */ + if (OPAL_ERR_NETWORK_NOT_PARSEABLE == rc) { + orte_show_help("help-oob-tcp.txt", "not-parseable", true); + opal_argv_free(interfaces); + return NULL; + } /* if we are including, then ignore this if not present */ if (including) { - if (!opal_ifmatches(i, interfaces)) { + if (OPAL_SUCCESS != rc) { OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle, "%s oob:tcp:init rejecting interface %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name)); @@ -1351,7 +1363,7 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) } } else { /* we are excluding, so ignore if present */ - if (opal_ifmatches(i, interfaces)) { + if (OPAL_SUCCESS == rc) { OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle, "%s oob:tcp:init rejecting interface %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name)); @@ -1402,6 +1414,13 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) } if (opal_list_get_size(&mca_oob_tcp_component.tcp_available_devices) == 0) { + if (including) { + orte_show_help("help-oob-tcp.txt", "no-included-found", true, mca_oob_tcp_component.tcp_include); + } else if (excluding) { + orte_show_help("help-oob-tcp.txt", "excluded-all", true, mca_oob_tcp_component.tcp_exclude); + } else { + orte_show_help("help-oob-tcp.txt", "no-interfaces-avail", true); + } return NULL; } diff --git a/orte/mca/rml/base/rml_base_components.c b/orte/mca/rml/base/rml_base_components.c index 76d1235106..70b746cc12 100644 --- a/orte/mca/rml/base/rml_base_components.c +++ b/orte/mca/rml/base/rml_base_components.c @@ -132,6 +132,7 @@ orte_rml_base_select(void) orte_rml_component_t *wrapper_component = NULL; orte_rml_module_t *wrapper_module = NULL; char *rml_wrapper = NULL; + bool return_silent=false; if (selected) { return ORTE_SUCCESS; @@ -167,6 +168,9 @@ orte_rml_base_select(void) if (NULL == module) { opal_output_verbose(10, orte_rml_base_output, "orte_rml_base_select: init returned failure"); + if (priority < 0) { + return_silent = true; + } continue; } @@ -241,7 +245,12 @@ orte_rml_base_select(void) free(rml_wrapper); } - if (NULL == selected_component) return ORTE_ERROR; + if (NULL == selected_component) { + if (return_silent) { + return ORTE_ERR_SILENT; + } + return ORTE_ERROR; + } return ORTE_SUCCESS; } diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index 7043f55169..599ae176c0 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -144,8 +144,11 @@ rml_oob_init(int* priority) return &orte_rml_oob_module.super; } - if (mca_oob_base_init() != ORTE_SUCCESS) + if (mca_oob_base_init() != ORTE_SUCCESS) { + *priority = -1; return NULL; + } + *priority = 1; OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t);