1
1

Merge pull request #1855 from hjelmn/comm_rework

ompi/comm: refactor communicator cid code
Этот коммит содержится в:
Nathan Hjelm 2016-07-19 10:04:17 -06:00 коммит произвёл GitHub
родитель ced853476f 035c2e2e2a
Коммит 5edab9cb22
9 изменённых файлов: 735 добавлений и 1210 удалений

Просмотреть файл

@ -358,13 +358,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group,
}
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
mode, /* mode */
-1 ); /* send first */
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
@ -374,13 +368,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group,
newcomp->c_contextid, comm->c_contextid );
/* Activate the communicator and init coll-component */
rc = ompi_comm_activate( &newcomp, /* new communicator */
comm,
NULL,
NULL,
NULL,
mode,
-1 );
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
@ -609,13 +597,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
}
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
mode, /* mode */
-1 ); /* send first, doesn't matter */
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
@ -634,36 +616,15 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
/* Activate the communicator and init coll-component */
rc = ompi_comm_activate( &newcomp, /* new communicator */
comm,
NULL,
NULL,
NULL,
mode,
-1 );
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
exit:
if ( NULL != results ) {
free ( results );
}
if ( NULL != sorted ) {
free ( sorted );
}
if ( NULL != rresults) {
free ( rresults );
}
if ( NULL != rsorted ) {
free ( rsorted );
}
if ( NULL != lranks ) {
free ( lranks );
}
if ( NULL != rranks ) {
free ( rranks );
}
free ( results );
free ( sorted );
free ( rresults );
free ( rsorted );
free ( lranks );
free ( rranks );
/* Step 4: if we are not part of the comm, free the struct */
/* --------------------------------------------------------- */
@ -675,7 +636,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
}
*newcomm = newcomp;
return ( rc );
return rc;
}
@ -925,13 +886,7 @@ ompi_comm_split_type(ompi_communicator_t *comm,
}
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
mode, /* mode */
-1 ); /* send first, doesn't matter */
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
@ -950,13 +905,7 @@ ompi_comm_split_type(ompi_communicator_t *comm,
/* Activate the communicator and init coll-component */
rc = ompi_comm_activate( &newcomp, /* new communicator */
comm,
NULL,
NULL,
NULL,
mode,
-1 );
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
@ -1031,13 +980,7 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, omp
}
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
mode, /* mode */
-1 ); /* send_first */
rc = ompi_comm_nextcid (newcomp, comm, NULL, NULL, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
return rc;
}
@ -1047,13 +990,7 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, omp
newcomp->c_contextid, comm->c_contextid );
/* activate communicator and init coll-module */
rc = ompi_comm_activate( &newcomp, /* new communicator */
comm,
NULL,
NULL,
NULL,
mode,
-1 );
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
return rc;
}
@ -1062,11 +999,15 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, omp
return MPI_SUCCESS;
}
struct ompi_comm_idup_with_info_context {
struct ompi_comm_idup_with_info_context_t {
opal_object_t super;
ompi_communicator_t *comm;
ompi_communicator_t *newcomp;
};
typedef struct ompi_comm_idup_with_info_context_t ompi_comm_idup_with_info_context_t;
OBJ_CLASS_INSTANCE(ompi_comm_idup_with_info_context_t, opal_object_t, NULL, NULL);
static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request);
static int ompi_comm_idup_with_info_finish (ompi_comm_request_t *request);
static int ompi_comm_idup_getcid (ompi_comm_request_t *request);
@ -1085,7 +1026,7 @@ int ompi_comm_idup_with_info (ompi_communicator_t *comm, ompi_info_t *info, ompi
static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group,
ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req)
{
struct ompi_comm_idup_with_info_context *context;
ompi_comm_idup_with_info_context_t *context;
ompi_comm_request_t *request;
ompi_request_t *subreq[1];
int rc;
@ -1101,7 +1042,7 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
return OMPI_ERR_OUT_OF_RESOURCE;
}
context = calloc (1, sizeof (*context));
context = OBJ_NEW(ompi_comm_idup_with_info_context_t);
if (NULL == context) {
ompi_comm_request_return (request);
return OMPI_ERR_OUT_OF_RESOURCE;
@ -1109,7 +1050,7 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
context->comm = comm;
request->context = context;
request->context = &context->super;
rc = ompi_comm_set_nb (&context->newcomp, /* new comm */
comm, /* old comm */
@ -1142,8 +1083,8 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
static int ompi_comm_idup_getcid (ompi_comm_request_t *request)
{
struct ompi_comm_idup_with_info_context *context =
(struct ompi_comm_idup_with_info_context *) request->context;
ompi_comm_idup_with_info_context_t *context =
(ompi_comm_idup_with_info_context_t *) request->context;
ompi_request_t *subreq[1];
int rc, mode;
@ -1154,11 +1095,8 @@ static int ompi_comm_idup_getcid (ompi_comm_request_t *request)
}
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid_nb (context->newcomp, /* new communicator */
context->comm, /* old comm */
NULL, /* bridge comm */
mode, /* mode */
subreq); /* new subrequest */
rc = ompi_comm_nextcid_nb (context->newcomp, context->comm, NULL, NULL,
NULL, false, mode, subreq);
if (OMPI_SUCCESS != rc) {
ompi_comm_request_return (request);
return rc;
@ -1171,8 +1109,8 @@ static int ompi_comm_idup_getcid (ompi_comm_request_t *request)
static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request)
{
struct ompi_comm_idup_with_info_context *context =
(struct ompi_comm_idup_with_info_context *) request->context;
ompi_comm_idup_with_info_context_t *context =
(ompi_comm_idup_with_info_context_t *) request->context;
ompi_request_t *subreq[1];
int rc, mode;
@ -1187,7 +1125,7 @@ static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request)
context->newcomp->c_contextid, context->comm->c_contextid );
/* activate communicator and init coll-module */
rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, mode, subreq);
rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, NULL, NULL, false, mode, subreq);
if ( OMPI_SUCCESS != rc ) {
return rc;
}
@ -1233,13 +1171,7 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int
}
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old comm */
newcomp, /* bridge comm (used to pass the group into the group allreduce) */
&tag, /* user defined tag */
NULL, /* remote_leader */
mode, /* mode */
-1 ); /* send_first */
rc = ompi_comm_nextcid (newcomp, comm, NULL, &tag, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
return rc;
}
@ -1249,13 +1181,7 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int
newcomp->c_contextid, comm->c_contextid );
/* activate communicator and init coll-module */
rc = ompi_comm_activate( &newcomp, /* new communicator */
comm,
newcomp,
&tag,
NULL,
mode,
-1 );
rc = ompi_comm_activate (&newcomp, comm, NULL, &tag, NULL, false, mode);
if ( OMPI_SUCCESS != rc ) {
return rc;
}
@ -1924,13 +1850,8 @@ int ompi_comm_enable(ompi_communicator_t *old_comm,
int ret = OMPI_SUCCESS;
/* Determine context id. It is identical to f_2_c_handle */
ret = ompi_comm_nextcid ( new_comm, /* new communicator */
old_comm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
OMPI_COMM_CID_INTRA, /* mode */
-1 ); /* send first, doesn't matter */
ret = ompi_comm_nextcid (new_comm, old_comm, NULL, NULL, NULL, false,
OMPI_COMM_CID_INTRA);
if (OMPI_SUCCESS != ret) {
/* something wrong happened while setting the communicator */
goto complete_and_return;
@ -1953,15 +1874,8 @@ int ompi_comm_enable(ompi_communicator_t *old_comm,
goto complete_and_return;
}
ret = ompi_comm_activate( &new_comm, /* new communicator */
old_comm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
OMPI_COMM_CID_INTRA, /* mode */
-1 ); /* send first, doesn't matter */
ret = ompi_comm_activate (&new_comm, old_comm, NULL, NULL, NULL, false,
OMPI_COMM_CID_INTRA);
if (OMPI_SUCCESS != ret) {
/* something wrong happened while setting the communicator */
goto complete_and_return;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -206,10 +206,6 @@ int ompi_comm_init(void)
OBJ_RETAIN(&ompi_mpi_group_null.group);
OBJ_RETAIN(&ompi_mpi_errors_are_fatal.eh);
/* initialize the comm_reg stuff for multi-threaded comm_cid
allocation */
ompi_comm_reg_init();
/* initialize communicator requests (for ompi_comm_idup) */
ompi_comm_request_init ();
@ -328,13 +324,9 @@ int ompi_comm_finalize(void)
}
}
OBJ_DESTRUCT (&ompi_mpi_communicators);
OBJ_DESTRUCT (&ompi_comm_f_to_c_table);
/* finalize the comm_reg stuff */
ompi_comm_reg_finalize();
/* finalize communicator requests */
ompi_comm_request_fini ();

Просмотреть файл

@ -235,6 +235,7 @@ static void ompi_comm_request_destruct (ompi_comm_request_t *request)
{
OBJ_DESTRUCT(&request->schedule);
}
OBJ_CLASS_INSTANCE(ompi_comm_request_t, ompi_request_t,
ompi_comm_request_construct,
ompi_comm_request_destruct);
@ -258,10 +259,10 @@ ompi_comm_request_t *ompi_comm_request_get (void)
void ompi_comm_request_return (ompi_comm_request_t *request)
{
if (request->context) {
free (request->context);
request->context = NULL;
OBJ_RELEASE (request->context);
}
OMPI_REQUEST_FINI(&request->super);
opal_free_list_return (&ompi_comm_requests, (opal_free_list_item_t *) request);
}

Просмотреть файл

@ -21,7 +21,7 @@
typedef struct ompi_comm_request_t {
ompi_request_t super;
void *context;
opal_object_t *context;
opal_list_t schedule;
} ompi_comm_request_t;
OBJ_CLASS_DECLARATION(ompi_comm_request_t);

Просмотреть файл

@ -496,24 +496,27 @@ ompi_communicator_t* ompi_comm_allocate (int local_group_size,
* @param mode: combination of input
* OMPI_COMM_CID_INTRA: intra-comm
* OMPI_COMM_CID_INTER: inter-comm
* OMPI_COMM_CID_GROUP: only decide CID within the ompi_group_t
* associated with the communicator. arg0
* must point to an int which will be used
* as the pml tag for communication.
* OMPI_COMM_CID_INTRA_BRIDGE: 2 intracomms connected by
* a bridge comm. local_leader
* and remote leader are in this
* case an int (rank in bridge-comm).
* a bridge comm. arg0 and arg1 must point
* to integers representing the local and
* remote leader ranks. the remote leader rank
* is a rank in the bridgecomm.
* OMPI_COMM_CID_INTRA_PMIX: 2 intracomms, leaders talk
* through PMIx. lleader and rleader
* are the required contact information.
* through PMIx. arg0 must point to an integer
* representing the local leader rank. arg1
* must point to a string representing the
* port of the remote leader.
* @param send_first: to avoid a potential deadlock for
* the OOB version.
* This routine has to be thread safe in the final version.
*/
OMPI_DECLSPEC int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
ompi_communicator_t* oldcomm,
ompi_communicator_t* bridgecomm,
void* local_leader,
void* remote_leader,
int mode,
int send_first);
OMPI_DECLSPEC int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm,
ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1,
bool send_first, int mode);
/**
* allocate new communicator ID (non-blocking)
@ -525,10 +528,9 @@ OMPI_DECLSPEC int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
* OMPI_COMM_CID_INTER: inter-comm
* This routine has to be thread safe in the final version.
*/
OMPI_DECLSPEC int ompi_comm_nextcid_nb (ompi_communicator_t* newcomm,
ompi_communicator_t* comm,
ompi_communicator_t* bridgecomm,
int mode, ompi_request_t **req);
OMPI_DECLSPEC int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm,
ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1,
bool send_first, int mode, ompi_request_t **req);
/**
* shut down the communicator infrastructure.
@ -621,18 +623,25 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm,
int high );
OMPI_DECLSPEC int ompi_comm_activate ( ompi_communicator_t** newcomm,
ompi_communicator_t* comm,
ompi_communicator_t* bridgecomm,
void* local_leader,
void* remote_leader,
int mode,
int send_first );
OMPI_DECLSPEC int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm,
ompi_communicator_t *bridgecomm, const void *arg0,
const void *arg1, bool send_first, int mode);
OMPI_DECLSPEC int ompi_comm_activate_nb (ompi_communicator_t **newcomm,
ompi_communicator_t *comm,
ompi_communicator_t *bridgecomm,
int mode, ompi_request_t **req);
/**
* Non-blocking variant of comm_activate.
*
* @param[inout] newcomm New communicator
* @param[in] comm Parent communicator
* @param[in] bridgecomm Bridge communicator (used for PMIX and bridge modes)
* @param[in] arg0 Mode argument 0
* @param[in] arg1 Mode argument 1
* @param[in] send_first Send first from this process (PMIX mode only)
* @param[in] mode Collective mode
* @param[out] req New request object to track this operation
*/
OMPI_DECLSPEC int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *comm,
ompi_communicator_t *bridgecomm, const void *arg0,
const void *arg1, bool send_first, int mode, ompi_request_t **req);
/**
* a simple function to dump the structure
@ -642,14 +651,6 @@ int ompi_comm_dump ( ompi_communicator_t *comm );
/* setting name */
int ompi_comm_set_name (ompi_communicator_t *comm, const char *name );
/*
* these are the init and finalize functions for the comm_reg
* stuff. These routines are necessary for handling multi-threading
* scenarious in the communicator_cid allocation
*/
void ompi_comm_reg_init(void);
void ompi_comm_reg_finalize(void);
/* global variable to save the number od dynamic communicators */
extern int ompi_comm_num_dyncomm;

Просмотреть файл

@ -469,25 +469,25 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
new_group_pointer = MPI_GROUP_NULL;
/* allocate comm_cid */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void*)port_string, /* rendezvous point */
OMPI_COMM_CID_INTRA_PMIX, /* mode */
send_first ); /* send or recv first */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void*)port_string, /* rendezvous point */
send_first, /* send or recv first */
OMPI_COMM_CID_INTRA_PMIX); /* mode */
if (OMPI_SUCCESS != rc) {
goto exit;
}
/* activate comm and init coll-component */
rc = ompi_comm_activate ( &newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void*)port_string, /* rendezvous point */
OMPI_COMM_CID_INTRA_PMIX, /* mode */
send_first ); /* send or recv first */
rc = ompi_comm_activate ( &newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void*)port_string, /* rendezvous point */
send_first, /* send or recv first */
OMPI_COMM_CID_INTRA_PMIX); /* mode */
if (OMPI_SUCCESS != rc) {
goto exit;
}
@ -500,7 +500,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
exit:
if (OMPI_SUCCESS != rc) {
if (MPI_COMM_NULL != newcomp && NULL != newcomp) {
OBJ_RETAIN(newcomp);
OBJ_RELEASE(newcomp);
newcomp = MPI_COMM_NULL;
}
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -14,6 +15,8 @@
* Copyright (c) 2012-2013 Inria. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -199,26 +202,15 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader,
new_group_pointer = MPI_GROUP_NULL;
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid ( newcomp, /* new comm */
local_comm, /* old comm */
bridge_comm, /* bridge comm */
&lleader, /* local leader */
&rleader, /* remote_leader */
OMPI_COMM_CID_INTRA_BRIDGE, /* mode */
-1 ); /* send_first */
rc = ompi_comm_nextcid (newcomp, local_comm, bridge_comm, &lleader,
&rleader, false, OMPI_COMM_CID_INTRA_BRIDGE);
if ( MPI_SUCCESS != rc ) {
goto err_exit;
}
/* activate comm and init coll-module */
rc = ompi_comm_activate ( &newcomp,
local_comm, /* old comm */
bridge_comm, /* bridge comm */
&lleader, /* local leader */
&rleader, /* remote_leader */
OMPI_COMM_CID_INTRA_BRIDGE, /* mode */
-1 ); /* send_first */
rc = ompi_comm_activate (&newcomp, local_comm, bridge_comm, &lleader, &rleader,
false, OMPI_COMM_CID_INTRA_BRIDGE);
if ( MPI_SUCCESS != rc ) {
goto err_exit;
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -14,6 +15,8 @@
* Copyright (c) 2012-2013 Inria. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -117,26 +120,16 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high,
OBJ_RELEASE(new_group_pointer);
new_group_pointer = MPI_GROUP_NULL;
/* Determine context id. It is identical to f_2_c_handle */
rc = ompi_comm_nextcid ( newcomp, /* new comm */
intercomm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
OMPI_COMM_CID_INTER, /* mode */
-1 ); /* send_first */
/* Determine context id */
rc = ompi_comm_nextcid (newcomp, intercomm, NULL, NULL, NULL, false,
OMPI_COMM_CID_INTER);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}
/* activate communicator and init coll-module */
rc = ompi_comm_activate( &newcomp, /* new comm */
intercomm, /* old comm */
NULL, /* bridge comm */
NULL, /* local leader */
NULL, /* remote_leader */
OMPI_COMM_CID_INTER, /* mode */
-1 ); /* send_first */
rc = ompi_comm_activate (&newcomp, intercomm, NULL, NULL, NULL, false,
OMPI_COMM_CID_INTER);
if ( OMPI_SUCCESS != rc ) {
goto exit;
}