I hadn't honestly considered before that an MPI process might attempt to call functions in the routed framework intended solely for daemons and HNPs. By design, MPI processes are not allowed to route RML/OOB messages, and hence the routed module in an MPI process has no knowledge whatsoever of how a message will reach its destination (except in the direct module). Thus, it has no way to return a valid routing tree, update a routing tree, or get wireup info.
This commit ensures that attempts to access information that is unknowable or undefined returns appropriate invalid or not_supported values to avoid unexpected behavior and/or segfaults. This commit was SVN r18692.
Этот коммит содержится в:
родитель
7905db57bd
Коммит
c693d3a5d1
@ -681,6 +681,13 @@ static int update_routing_tree(void)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* clear the list of children if any are already present */
|
||||
while (NULL != (item = opal_list_remove_first(&my_children))) {
|
||||
OBJ_RELEASE(item);
|
||||
@ -700,6 +707,13 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
opal_list_item_t *item;
|
||||
orte_namelist_t *nm, *child;
|
||||
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/* the binomial routing tree always goes to our children,
|
||||
* for any job
|
||||
*/
|
||||
@ -723,6 +737,14 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* if I am anything other than the HNP, this
|
||||
* is a meaningless command as I cannot get
|
||||
* the requested info
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* if we are not using static ports, then we need to share the
|
||||
* comm info - otherwise, just return
|
||||
*/
|
||||
@ -730,7 +752,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(job, buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
|
@ -670,6 +670,13 @@ static bool route_is_defined(const orte_process_name_t *target)
|
||||
|
||||
static int update_routing_tree(void)
|
||||
{
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* nothing to do here as the routing tree is fixed */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -679,11 +686,17 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
{
|
||||
orte_namelist_t *nm;
|
||||
|
||||
/* for anyone other than the HNP, the direct routing
|
||||
* does not go anywhere - we don't relay - and our
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/* if I am a daemon, I have no children and my
|
||||
* parent is the HNP
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (orte_process_info.daemon) {
|
||||
return ORTE_PROC_MY_HNP->vpid;
|
||||
}
|
||||
|
||||
@ -702,11 +715,19 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
static int get_wireup_info(orte_jobid_t jobid, opal_buffer_t *buf)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
|
||||
/* if I am anything other than the HNP, this
|
||||
* is a meaningless command as I cannot get
|
||||
* the requested info
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(jobid, buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
|
@ -616,6 +616,13 @@ static bool route_is_defined(const orte_process_name_t *target)
|
||||
|
||||
static int update_routing_tree(void)
|
||||
{
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* nothing to do here as the routing tree is fixed */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -625,11 +632,17 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job,
|
||||
{
|
||||
orte_namelist_t *nm;
|
||||
|
||||
/* for anyone other than the HNP, the linear routing
|
||||
* does not go anywhere - we don't relay - and our
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/* if I am a daemon, I have no children and my
|
||||
* parent is the HNP
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (orte_process_info.daemon) {
|
||||
return ORTE_PROC_MY_HNP->vpid;
|
||||
}
|
||||
|
||||
@ -652,6 +665,14 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* if I am anything other than the HNP, this
|
||||
* is a meaningless command as I cannot get
|
||||
* the requested info
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* if we are not using static ports, then we need to share the
|
||||
* comm info - otherwise, just return
|
||||
*/
|
||||
@ -659,7 +680,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(job, buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user