1
1

Complete job control integration

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
This commit is contained in:
Ralph Castain 2018-08-20 16:08:54 -07:00
parent c087cb3307
commit e27e945d9a
7 changed files with 19 additions and 36 deletions

View File

@ -98,7 +98,8 @@ enum {
OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67),
OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68),
OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69),
OPAL_PMIX_LAUNCH_DIRECTIVE = (OPAL_ERR_BASE - 70)
OPAL_PMIX_LAUNCH_DIRECTIVE = (OPAL_ERR_BASE - 70),
OPAL_OPERATION_SUCCEEDED = (OPAL_ERR_BASE - 71)
};
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)

View File

@ -185,6 +185,7 @@ pmix_status_t pmix1_convert_opalrc(int rc)
case OPAL_ERROR:
return PMIX_ERROR;
case OPAL_SUCCESS:
case OPAL_OPERATION_SUCCEEDED:
return PMIX_SUCCESS;
default:
return PMIX_ERROR;

View File

@ -440,6 +440,7 @@ pmix_status_t ext2x_convert_opalrc(int rc)
case OPAL_ERROR:
return PMIX_ERROR;
case OPAL_SUCCESS:
case OPAL_OPERATION_SUCCEEDED:
return PMIX_SUCCESS;
default:
return rc;

View File

@ -364,37 +364,13 @@ void pmix3x_event_hdlr(size_t evhdlr_registration_id,
return;
}
static void cleanup_cbfunc(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata;
OPAL_POST_OBJECT(lk);
/* let the library release the data and cleanup from
* the operation */
if (NULL != release_fn) {
release_fn(release_cbdata);
}
/* release the block */
lk->status = pmix3x_convert_rc(status);
OPAL_PMIX_WAKEUP_THREAD(lk);
}
static int pmix3x_register_cleanup(char *path, bool directory, bool ignore, bool jobscope)
{
opal_pmix_lock_t lk;
pmix_info_t pinfo[3];
size_t n, ninfo=0;
pmix_status_t rc;
int ret;
OPAL_PMIX_CONSTRUCT_LOCK(&lk);
if (ignore) {
/* they want this path ignored */
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_IGNORE, path, PMIX_STRING);
@ -415,18 +391,12 @@ static int pmix3x_register_cleanup(char *path, bool directory, bool ignore, bool
/* if they want this applied to the job, then indicate so */
if (jobscope) {
rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, cleanup_cbfunc, (void*)&lk);
rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, NULL, NULL);
} else {
/* only applies to us */
rc = PMIx_Job_control_nb(&mca_pmix_pmix3x_component.myproc, 1, pinfo, ninfo, cleanup_cbfunc, (void*)&lk);
rc = PMIx_Job_control_nb(&mca_pmix_pmix3x_component.myproc, 1, pinfo, ninfo, NULL, NULL);
}
if (PMIX_SUCCESS != rc) {
ret = pmix3x_convert_rc(rc);
} else {
OPAL_PMIX_WAIT_THREAD(&lk);
ret = lk.status;
}
OPAL_PMIX_DESTRUCT_LOCK(&lk);
ret = pmix3x_convert_rc(rc);
for (n=0; n < ninfo; n++) {
PMIX_INFO_DESTRUCT(&pinfo[n]);
}
@ -536,6 +506,10 @@ pmix_status_t pmix3x_convert_opalrc(int rc)
return PMIX_ERROR;
case OPAL_SUCCESS:
return PMIX_SUCCESS;
case OPAL_OPERATION_SUCCEEDED:
return PMIX_OPERATION_SUCCEEDED;
default:
return rc;
}
@ -629,6 +603,10 @@ int pmix3x_convert_rc(pmix_status_t rc)
return OPAL_ERROR;
case PMIX_SUCCESS:
return OPAL_SUCCESS;
case PMIX_OPERATION_SUCCEEDED:
return OPAL_OPERATION_SUCCEEDED;
default:
return rc;
}

View File

@ -1621,6 +1621,7 @@ int pmix3x_job_control(opal_list_t *targets,
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERR_NOT_INITIALIZED;
}
abort();
/* create the caddy */
op = OBJ_NEW(pmix3x_opcaddy_t);

View File

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -93,6 +93,7 @@ enum {
ORTE_ERR_PROC_ABORTING = OPAL_ERR_PROC_ABORTING,
ORTE_ERR_NODE_DOWN = OPAL_ERR_NODE_DOWN,
ORTE_ERR_NODE_OFFLINE = OPAL_ERR_NODE_OFFLINE,
ORTE_OPERATION_SUCCEEDED = OPAL_OPERATION_SUCCEEDED,
/* error codes specific to ORTE - don't forget to update
orte/util/error_strings.c when adding new error codes!!

View File

@ -1186,5 +1186,5 @@ int pmix_server_job_ctrl_fn(const opal_process_name_t *requestor,
}
}
return ORTE_SUCCESS;
return ORTE_OPERATION_SUCCEEDED;
}