Merge pull request #5838 from rhc54/topic/ev
Correctly notify upon process failure
Этот коммит содержится в:
Коммит
44afb59a01
@ -516,6 +516,7 @@ int pmix4x_server_notify_event(int status,
|
|||||||
size_t sz, n;
|
size_t sz, n;
|
||||||
pmix_status_t rc;
|
pmix_status_t rc;
|
||||||
pmix4x_opcaddy_t *op;
|
pmix4x_opcaddy_t *op;
|
||||||
|
pmix_data_range_t range = PMIX_RANGE_SESSION;
|
||||||
|
|
||||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||||
if (0 >= opal_pmix_base.initialized) {
|
if (0 >= opal_pmix_base.initialized) {
|
||||||
@ -535,6 +536,9 @@ int pmix4x_server_notify_event(int status,
|
|||||||
pinfo[n].value.data.status = pmix4x_convert_opalrc(kv->data.integer);
|
pinfo[n].value.data.status = pmix4x_convert_opalrc(kv->data.integer);
|
||||||
} else {
|
} else {
|
||||||
pmix4x_value_load(&pinfo[n].value, kv);
|
pmix4x_value_load(&pinfo[n].value, kv);
|
||||||
|
if (0 == strcmp(kv->key, OPAL_PMIX_EVENT_CUSTOM_RANGE)) {
|
||||||
|
range = PMIX_RANGE_CUSTOM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
++n;
|
++n;
|
||||||
}
|
}
|
||||||
@ -561,7 +565,7 @@ int pmix4x_server_notify_event(int status,
|
|||||||
rc = pmix4x_convert_opalrc(status);
|
rc = pmix4x_convert_opalrc(status);
|
||||||
/* the range must be nonlocal so the server will pass
|
/* the range must be nonlocal so the server will pass
|
||||||
* the event down to its local clients */
|
* the event down to its local clients */
|
||||||
rc = PMIx_Notify_event(rc, &op->p, PMIX_RANGE_SESSION,
|
rc = PMIx_Notify_event(rc, &op->p, range,
|
||||||
pinfo, sz, opcbfunc, op);
|
pinfo, sz, opcbfunc, op);
|
||||||
if (PMIX_SUCCESS != rc) {
|
if (PMIX_SUCCESS != rc) {
|
||||||
OBJ_RELEASE(op);
|
OBJ_RELEASE(op);
|
||||||
|
@ -551,36 +551,17 @@ static void _send_notification(int status,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OPAL_ERR_PROC_ABORTED == status) {
|
if (ORTE_VPID_WILDCARD == target->vpid) {
|
||||||
/* we will pass three opal_value_t's */
|
/* we will only pass the affected proc */
|
||||||
rc = 3;
|
rc = 1;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
OBJ_RELEASE(buf);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/* pass along the affected proc(s) */
|
|
||||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
||||||
kv.key = strdup(OPAL_PMIX_EVENT_AFFECTED_PROC);
|
|
||||||
kv.type = OPAL_NAME;
|
|
||||||
kv.data.name.jobid = proc->jobid;
|
|
||||||
kv.data.name.vpid = proc->vpid;
|
|
||||||
kvptr = &kv;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
OBJ_RELEASE(buf);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
} else {
|
} else {
|
||||||
/* we are going to pass two opal_value_t's */
|
/* we have to pass the target */
|
||||||
rc = 2;
|
rc = 2;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) {
|
}
|
||||||
ORTE_ERROR_LOG(rc);
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rc, 1, OPAL_INT))) {
|
||||||
OBJ_RELEASE(buf);
|
ORTE_ERROR_LOG(rc);
|
||||||
return;
|
OBJ_RELEASE(buf);
|
||||||
}
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* pass along the affected proc(s) */
|
/* pass along the affected proc(s) */
|
||||||
@ -598,23 +579,8 @@ static void _send_notification(int status,
|
|||||||
}
|
}
|
||||||
OBJ_DESTRUCT(&kv);
|
OBJ_DESTRUCT(&kv);
|
||||||
|
|
||||||
/* pass along the proc(s) to be notified */
|
|
||||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
||||||
kv.key = strdup(OPAL_PMIX_EVENT_CUSTOM_RANGE);
|
|
||||||
kv.type = OPAL_NAME;
|
|
||||||
kv.data.name.jobid = target->jobid;
|
|
||||||
kv.data.name.vpid = target->vpid;
|
|
||||||
kvptr = &kv;
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
OBJ_RELEASE(buf);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
|
|
||||||
/* if the targets are a wildcard, then xcast it to everyone */
|
|
||||||
if (ORTE_VPID_WILDCARD == target->vpid) {
|
if (ORTE_VPID_WILDCARD == target->vpid) {
|
||||||
|
/* xcast it to everyone */
|
||||||
OBJ_CONSTRUCT(&sig, orte_grpcomm_signature_t);
|
OBJ_CONSTRUCT(&sig, orte_grpcomm_signature_t);
|
||||||
sig.signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
|
sig.signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
|
||||||
sig.signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
|
sig.signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
@ -627,6 +593,20 @@ static void _send_notification(int status,
|
|||||||
OBJ_DESTRUCT(&sig);
|
OBJ_DESTRUCT(&sig);
|
||||||
OBJ_RELEASE(buf);
|
OBJ_RELEASE(buf);
|
||||||
} else {
|
} else {
|
||||||
|
/* pass along the proc to be notified */
|
||||||
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
||||||
|
kv.key = strdup(OPAL_PMIX_EVENT_CUSTOM_RANGE);
|
||||||
|
kv.type = OPAL_NAME;
|
||||||
|
kv.data.name.jobid = target->jobid;
|
||||||
|
kv.data.name.vpid = target->vpid;
|
||||||
|
kvptr = &kv;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &kvptr, 1, OPAL_VALUE))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_DESTRUCT(&kv);
|
||||||
|
OBJ_RELEASE(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
OBJ_DESTRUCT(&kv);
|
||||||
/* get the daemon hosting the proc to be notified */
|
/* get the daemon hosting the proc to be notified */
|
||||||
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
|
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
daemon.vpid = orte_get_proc_daemon_vpid(target);
|
daemon.vpid = orte_get_proc_daemon_vpid(target);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user