After consultation with Tim: when we set the ABORTED state for a proc,
also increase the count on TERMINATED so that orterun can know when *all* processes have died. This commit was SVN r5404.
Этот коммит содержится в:
родитель
f9ef7d4657
Коммит
7ad0d67a9d
@ -39,6 +39,7 @@ int orte_soh_base_set_proc_soh(orte_process_name_t *proc,
|
|||||||
int rc;
|
int rc;
|
||||||
orte_jobid_t jobid;
|
orte_jobid_t jobid;
|
||||||
orte_vpid_t vpid;
|
orte_vpid_t vpid;
|
||||||
|
int i;
|
||||||
|
|
||||||
value = OBJ_NEW(orte_gpr_value_t);
|
value = OBJ_NEW(orte_gpr_value_t);
|
||||||
if (NULL == value) {
|
if (NULL == value) {
|
||||||
@ -127,21 +128,30 @@ int orte_soh_base_set_proc_soh(orte_process_name_t *proc,
|
|||||||
}
|
}
|
||||||
value->tokens[0] = strdup(ORTE_JOB_GLOBALS);
|
value->tokens[0] = strdup(ORTE_JOB_GLOBALS);
|
||||||
value->num_tokens = 1;
|
value->num_tokens = 1;
|
||||||
|
|
||||||
value->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
|
/* If we're setting ABORTED, we're also setting TERMINATED, so we
|
||||||
|
need 2 keyvals. Everything else only needs 1 keyval. */
|
||||||
|
|
||||||
|
value->cnt = 1;
|
||||||
|
if (ORTE_PROC_STATE_ABORTED == state) {
|
||||||
|
++value->cnt;
|
||||||
|
}
|
||||||
|
value->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*) *
|
||||||
|
value->cnt);
|
||||||
if (NULL == value->keyvals) {
|
if (NULL == value->keyvals) {
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
OBJ_RELEASE(value);
|
OBJ_RELEASE(value);
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
value->cnt = 1;
|
for (i = 0; i < value->cnt; ++i) {
|
||||||
value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
|
value->keyvals[i] = OBJ_NEW(orte_gpr_keyval_t);
|
||||||
if (NULL == value->keyvals[0]) {
|
if (NULL == value->keyvals[i]) {
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
OBJ_RELEASE(value);
|
OBJ_RELEASE(value);
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
(value->keyvals[i])->type = ORTE_NULL;
|
||||||
}
|
}
|
||||||
(value->keyvals[0])->type = ORTE_NULL;
|
|
||||||
|
|
||||||
/* see which state we are in - let that determine the counter, if any */
|
/* see which state we are in - let that determine the counter, if any */
|
||||||
switch (state) {
|
switch (state) {
|
||||||
@ -167,6 +177,7 @@ int orte_soh_base_set_proc_soh(orte_process_name_t *proc,
|
|||||||
|
|
||||||
case ORTE_PROC_STATE_ABORTED:
|
case ORTE_PROC_STATE_ABORTED:
|
||||||
(value->keyvals[0])->key = strdup(ORTE_PROC_NUM_ABORTED);
|
(value->keyvals[0])->key = strdup(ORTE_PROC_NUM_ABORTED);
|
||||||
|
(value->keyvals[1])->key = strdup(ORTE_PROC_NUM_TERMINATED);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (NULL != (value->keyvals[0])->key) { /* need to increment a counter */
|
if (NULL != (value->keyvals[0])->key) { /* need to increment a counter */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user