1
1

pmix: added check for pmix fence status

Signed-off-by: Boris Karasev <karasev.b@gmail.com>
(cherry picked from commit 57683366ca)

Conflicts:
	opal/mca/common/ucx/common_ucx.c
	opal/mca/common/ucx/common_ucx.h

Modified:
	ompi/mca/pml/ucx/pml_ucx.c
	oshmem/mca/spml/ucx/spml_ucx.c
Этот коммит содержится в:
Boris Karasev 2018-07-30 16:55:52 +06:00
родитель 8483eb4bf7
Коммит 8873d901e8
14 изменённых файлов: 174 добавлений и 45 удалений

Просмотреть файл

@ -589,7 +589,11 @@ int ompi_dpm_disconnect(ompi_communicator_t *comm)
/* ensure we tell the host RM to disconnect us - this
* is a blocking operation so just use a fence */
ret = opal_pmix.fence(&coll, false);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(&coll, false))) {
OMPI_ERROR_LOG(ret);
OPAL_LIST_DESTRUCT(&coll);
return ret;
}
OPAL_LIST_DESTRUCT(&coll);
return ret;

Просмотреть файл

@ -155,7 +155,10 @@ int mca_bml_r2_ft_event(int state)
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
opal_pmix.fence(NULL, 0);
if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "bml:r2: ft_event(Restart): Failed to fence complete\n");
return ret;
}
/*
* Re-open the BTL framework to get the full list of components.
@ -224,7 +227,10 @@ int mca_bml_r2_ft_event(int state)
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
opal_pmix.fence(NULL, 0);
if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "bml:r2: ft_event(Restart): Failed to fence complete\n");
return ret;
}
/*
* Re-open the BTL framework to get the full list of components.

Просмотреть файл

@ -3028,7 +3028,10 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR0);
opal_pmix.fence(NULL, 0);
if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
exit_status = ret;
goto DONE;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP0);
@ -3096,7 +3099,10 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR1);
opal_pmix.fence(NULL, 0);
if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
exit_status = ret;
goto DONE;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE2);
}
@ -6207,14 +6213,16 @@ static void clear_timers(void) {
static void display_all_timers(int state) {
bool report_ready = false;
double barrier_start, barrier_stop;
int i;
int i, ret;
if( 0 != OMPI_PROC_MY_NAME->vpid ) {
if( 2 > timing_enabled ) {
return;
}
else if( 2 == timing_enabled ) {
opal_pmix.fence(NULL, 0);
if( OPAL_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
OPAL_ERROR_LOG(ret);
}
return;
}
}
@ -6235,7 +6243,9 @@ static void display_all_timers(int state) {
if( timing_enabled >= 2) {
barrier_start = get_time();
opal_pmix.fence(NULL, 0);
if( OPAL_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
OPAL_ERROR_LOG(ret);
}
barrier_stop = get_time();
opal_output(0,
"crcp:bkmrk: timing(%20s): %20s = %10.2f s\n",

Просмотреть файл

@ -666,7 +666,10 @@ int mca_pml_bfo_ft_event( int state )
if(OPAL_CRS_CHECKPOINT == state) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
@ -677,7 +680,10 @@ int mca_pml_bfo_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
}
@ -777,7 +783,10 @@ int mca_pml_bfo_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
}
@ -787,7 +796,10 @@ int mca_pml_bfo_ft_event( int state )
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
/*
* Startup the PML stack now that the modex is running again
@ -799,7 +811,10 @@ int mca_pml_bfo_ft_event( int state )
}
/* Is this barrier necessary ? JJH */
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
@ -812,7 +827,10 @@ int mca_pml_bfo_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
}
@ -825,7 +843,10 @@ int mca_pml_bfo_ft_event( int state )
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
/*
* Startup the PML stack now that the modex is running again
@ -837,7 +858,10 @@ int mca_pml_bfo_ft_event( int state )
}
/* Is this barrier necessary ? JJH */
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
return ret;
}
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {

Просмотреть файл

@ -807,7 +807,10 @@ int mca_pml_ob1_ft_event( int state )
if(OPAL_CRS_CHECKPOINT == state) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
@ -818,7 +821,10 @@ int mca_pml_ob1_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
}
@ -918,13 +924,19 @@ int mca_pml_ob1_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
}
if (opal_cr_continue_like_restart && !first_continue_pass) {
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
/*
* Startup the PML stack now that the modex is running again
@ -936,7 +948,10 @@ int mca_pml_ob1_ft_event( int state )
}
/* Is this barrier necessary ? JJH */
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
@ -949,7 +964,10 @@ int mca_pml_ob1_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
}
@ -962,7 +980,10 @@ int mca_pml_ob1_ft_event( int state )
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
/*
* Startup the PML stack now that the modex is running again
@ -974,7 +995,10 @@ int mca_pml_ob1_ft_event( int state )
}
/* Is this barrier necessary ? JJH */
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
return ret;
}
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {

Просмотреть файл

@ -389,6 +389,7 @@ int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs)
void *dreq, **dreqs;
ucp_ep_h ep;
size_t i;
int ret;
max_reqs = ompi_pml_ucx.num_disconnect;
if (max_reqs > nprocs) {
@ -433,7 +434,10 @@ int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs)
mca_pml_ucx_waitall(dreqs, &num_reqs);
free(dreqs);
opal_common_ucx_mca_pmix_fence(ompi_pml_ucx.ucp_worker);
if (OMPI_SUCCESS != (ret = opal_common_ucx_mca_pmix_fence(
ompi_pml_ucx.ucp_worker))) {
return ret;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -265,6 +265,7 @@ int mca_pml_yalla_add_procs(struct ompi_proc_t **procs, size_t nprocs)
int mca_pml_yalla_del_procs(struct ompi_proc_t **procs, size_t nprocs)
{
size_t i;
int ret;
if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
PML_YALLA_VERBOSE(3, "%s", "using bulk powerdown");
@ -276,7 +277,9 @@ int mca_pml_yalla_del_procs(struct ompi_proc_t **procs, size_t nprocs)
PML_YALLA_VERBOSE(2, "disconnected from rank %s", OPAL_NAME_PRINT(procs[i]->super.proc_name));
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = NULL;
}
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
return ret;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -257,7 +257,13 @@ int ompi_mpi_finalize(void)
* communications/actions to complete. See
* https://github.com/open-mpi/ompi/issues/1576 for the
* original bug report. */
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
if (OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL, 0, fence_cbfunc,
(void*)&active))) {
OMPI_ERROR_LOG(ret);
/* Reset the active flag to false, to avoid waiting for
* completion when the fence was failed. */
active = false;
}
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else {
/* However, we cannot guarantee that the provided PMIx has
@ -268,7 +274,9 @@ int ompi_mpi_finalize(void)
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
comm->c_coll->coll_barrier(comm, comm->c_coll->coll_barrier_module);
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
OMPI_ERROR_LOG(ret);
}
}
}

Просмотреть файл

@ -662,9 +662,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
#if (OPAL_ENABLE_TIMING)
if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex &&
opal_pmix_collect_all_data) {
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
error = "timing: pmix-barrier-1 failed";
goto error;
}
OMPI_TIMING_NEXT("pmix-barrier-1");
opal_pmix.fence(NULL, 0);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
error = "timing: pmix-barrier-2 failed";
goto error;
}
OMPI_TIMING_NEXT("pmix-barrier-2");
}
#endif
@ -687,19 +693,32 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
background_fence = true;
active = true;
OPAL_POST_OBJECT(&active);
opal_pmix.fence_nb(NULL, true, fence_release, (void*)&active);
if( OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL, true,
fence_release,
(void*)&active))) {
error = "opal_pmix.fence_nb() failed";
goto error;
}
} else if (!opal_pmix_base_async_modex) {
/* we want to do the modex */
active = true;
OPAL_POST_OBJECT(&active);
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
fence_release, (void*)&active);
if( OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL,
opal_pmix_collect_all_data, fence_release, (void*)&active))) {
error = "opal_pmix.fence_nb() failed";
goto error;
}
/* cannot just wait on thread as we need to call opal_progress */
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
}
/* otherwise, we don't want to do the modex, so fall thru */
} else if (!opal_pmix_base_async_modex || opal_pmix_collect_all_data) {
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL,
opal_pmix_collect_all_data))) {
error = "opal_pmix.fence() failed";
goto error;
}
}
OMPI_TIMING_NEXT("modex");
@ -877,11 +896,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
if (NULL != opal_pmix.fence_nb) {
active = true;
OPAL_POST_OBJECT(&active);
opal_pmix.fence_nb(NULL, false,
fence_release, (void*)&active);
if (OMPI_SUCCESS != (ret = opal_pmix.fence_nb(NULL, false,
fence_release, (void*)&active))) {
error = "opal_pmix.fence_nb() failed";
goto error;
}
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else {
opal_pmix.fence(NULL, false);
if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, false))) {
error = "opal_pmix.fence() failed";
goto error;
}
}
}

Просмотреть файл

@ -97,13 +97,19 @@ static void opal_common_ucx_mca_fence_complete_cb(int status, void *fenced)
*(int*)fenced = 1;
}
OPAL_DECLSPEC void opal_common_ucx_mca_pmix_fence(ucp_worker_h worker)
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker)
{
volatile int fenced = 0;
int ret = OPAL_SUCCESS;
if (OPAL_SUCCESS != (ret = opal_pmix.fence_nb(NULL, 0,
opal_common_ucx_mca_fence_complete_cb, (void*)&fenced))){
return ret;
}
opal_pmix.fence_nb(NULL, 0, opal_common_ucx_mca_fence_complete_cb, (void*)&fenced);
while (!fenced) {
ucp_worker_progress(worker);
}
}
return ret;
}

Просмотреть файл

@ -65,7 +65,7 @@ extern opal_common_ucx_module_t opal_common_ucx;
OPAL_DECLSPEC void opal_common_ucx_mca_register(void);
OPAL_DECLSPEC void opal_common_ucx_mca_deregister(void);
OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status);
OPAL_DECLSPEC void opal_common_ucx_mca_pmix_fence(ucp_worker_h worker);
OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence(ucp_worker_h worker);
static inline
int opal_common_ucx_wait_request(ucs_status_ptr_t request, ucp_worker_h worker,

Просмотреть файл

@ -458,7 +458,10 @@ static int rte_init(void)
if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
/* need to commit the data before we fence */
opal_pmix.commit();
opal_pmix.fence(NULL, 0);
if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
error = "opal_pmix.fence() failed";
goto error;
}
}
OPAL_TIMING_ENV_NEXT(rte_init, "rte_init_done");

Просмотреть файл

@ -150,7 +150,11 @@ int app_coord_init()
"app) Startup Barrier..."));
}
opal_pmix.fence(NULL, 0);
if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
if( 0 == ORTE_PROC_MY_NAME->vpid ) {
OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
@ -216,7 +220,11 @@ int app_coord_finalize()
"app) Shutdown Barrier..."));
}
opal_pmix.fence(NULL, 0);
if (ORTE_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
if( 0 == ORTE_PROC_MY_NAME->vpid ) {
OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,

Просмотреть файл

@ -125,6 +125,7 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
void *dreq, **dreqs;
ucp_ep_h ep;
size_t i, n;
int ret;
oshmem_shmem_barrier();
@ -175,7 +176,10 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
free(dreqs);
free(mca_spml_ucx.remote_addrs_tbl);
opal_common_ucx_mca_pmix_fence(mca_spml_ucx_ctx_default.ucp_worker);
if (OSHMEM_SUCCESS != (ret = opal_common_ucx_mca_pmix_fence(
mca_spml_ucx_ctx_default.ucp_worker))) {
return ret;
}
free(mca_spml_ucx_ctx_default.ucp_peers);
mca_spml_ucx_ctx_default.ucp_peers = NULL;
return OSHMEM_SUCCESS;