further corrections to the hierarchy detection algorithms. It seems to work now as far as my tests show...
This commit was SVN r7753.
Этот коммит содержится в:
родитель
9a25554559
Коммит
460b5cb840
@ -42,7 +42,7 @@
|
|||||||
#define HIER_MAXPROTOCOL 7
|
#define HIER_MAXPROTOCOL 7
|
||||||
static int mca_coll_hierarch_max_protocol=HIER_MAXPROTOCOL;
|
static int mca_coll_hierarch_max_protocol=HIER_MAXPROTOCOL;
|
||||||
|
|
||||||
static char hier_prot[HIER_MAXPROTOCOL][6]={"0","tcp","gm","mx","mvapi","openib","sm"};
|
static char hier_prot[HIER_MAXPROTOCOL][7]={"0","tcp","gm","mx","mvapi","openib","sm"};
|
||||||
|
|
||||||
static void mca_coll_hierarch_checkfor_component (struct ompi_communicator_t *comm,
|
static void mca_coll_hierarch_checkfor_component (struct ompi_communicator_t *comm,
|
||||||
int component_level,
|
int component_level,
|
||||||
@ -123,7 +123,7 @@ const mca_coll_base_module_1_0_0_t *
|
|||||||
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
|
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||||
struct mca_coll_base_comm_t **data)
|
struct mca_coll_base_comm_t **data)
|
||||||
{
|
{
|
||||||
int size;
|
int size, rank;
|
||||||
int color, ncount[2], maxncount[2];
|
int color, ncount[2], maxncount[2];
|
||||||
struct mca_coll_base_comm_t *tdata=NULL;
|
struct mca_coll_base_comm_t *tdata=NULL;
|
||||||
int level;
|
int level;
|
||||||
@ -141,6 +141,7 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
|
|||||||
}
|
}
|
||||||
|
|
||||||
size = ompi_comm_size(comm);
|
size = ompi_comm_size(comm);
|
||||||
|
rank = ompi_comm_rank(comm);
|
||||||
|
|
||||||
/* allocate the data structure holding all information */
|
/* allocate the data structure holding all information */
|
||||||
tdata = calloc ( 1, sizeof(struct mca_coll_base_comm_t));
|
tdata = calloc ( 1, sizeof(struct mca_coll_base_comm_t));
|
||||||
@ -214,7 +215,17 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
|
|||||||
comm->c_name, hier_prot[level]);
|
comm->c_name, hier_prot[level]);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
else if ( (maxncount[0] + maxncount[1]) == (size -1) ){
|
||||||
|
/* still every process would be part of this new comm,
|
||||||
|
so there is no point in creating it. */
|
||||||
|
printf("%s: every process would be part of this comm with prot %s. We continue\n",
|
||||||
|
comm->c_name, hier_prot[level]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
|
printf("%s: %d procs talk with %s. Suggesting to use this, key %d rank %d\n",
|
||||||
|
comm->c_name, maxncount[0], hier_prot[level], color, rank);
|
||||||
|
|
||||||
ret = mca_coll_hierarch_gather_tmp (&color, 1, MPI_INT, tdata->hier_colorarr, 1,
|
ret = mca_coll_hierarch_gather_tmp (&color, 1, MPI_INT, tdata->hier_colorarr, 1,
|
||||||
MPI_INT, 0, comm );
|
MPI_INT, 0, comm );
|
||||||
if ( OMPI_SUCCESS != ret ) {
|
if ( OMPI_SUCCESS != ret ) {
|
||||||
@ -228,7 +239,8 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
|
|||||||
|
|
||||||
tdata->hier_level = level;
|
tdata->hier_level = level;
|
||||||
*data = tdata;
|
*data = tdata;
|
||||||
return &intra;
|
/* return &intra; */
|
||||||
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,8 +430,8 @@ struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
ompi_group_decrement_proc_count (llgroup);
|
/* ompi_group_decrement_proc_count (llgroup);
|
||||||
OBJ_RELEASE(llgroup);
|
OBJ_RELEASE(llgroup); */
|
||||||
if ( MPI_UNDEFINED != *lrank ) {
|
if ( MPI_UNDEFINED != *lrank ) {
|
||||||
found = 0;
|
found = 0;
|
||||||
break;
|
break;
|
||||||
@ -432,8 +444,8 @@ struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank,
|
|||||||
*/
|
*/
|
||||||
llcomm = MPI_COMM_NULL;
|
llcomm = MPI_COMM_NULL;
|
||||||
}
|
}
|
||||||
ompi_group_decrement_proc_count (group);
|
/* ompi_group_decrement_proc_count (group);
|
||||||
OBJ_RELEASE(group);
|
OBJ_RELEASE(group); */
|
||||||
|
|
||||||
|
|
||||||
return llcomm;
|
return llcomm;
|
||||||
@ -538,7 +550,12 @@ mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
|
|||||||
/* check for the required component */
|
/* check for the required component */
|
||||||
if (! strcmp (btl->btl_version.mca_component_name, component_name)){
|
if (! strcmp (btl->btl_version.mca_component_name, component_name)){
|
||||||
counter++;
|
counter++;
|
||||||
|
if (i<firstproc ) {
|
||||||
|
firstproc = i;
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* check for any faster components */
|
/* check for any faster components */
|
||||||
for ( j=component_level+1; j< mca_coll_hierarch_max_protocol; j++) {
|
for ( j=component_level+1; j< mca_coll_hierarch_max_protocol; j++) {
|
||||||
if (!strcmp(btl->btl_version.mca_component_name, hier_prot[j])) {
|
if (!strcmp(btl->btl_version.mca_component_name, hier_prot[j])) {
|
||||||
@ -546,6 +563,7 @@ mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
|
|||||||
if (i<firstproc ) {
|
if (i<firstproc ) {
|
||||||
firstproc = i;
|
firstproc = i;
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user