1
1

further corrections to the hierarchy detection algorithms. It seems to work now as far as my tests show...

This commit was SVN r7753.
Этот коммит содержится в:
Edgar Gabriel 2005-10-13 16:21:13 +00:00
родитель 9a25554559
Коммит 460b5cb840

Просмотреть файл

@ -42,7 +42,7 @@
#define HIER_MAXPROTOCOL 7 #define HIER_MAXPROTOCOL 7
static int mca_coll_hierarch_max_protocol=HIER_MAXPROTOCOL; static int mca_coll_hierarch_max_protocol=HIER_MAXPROTOCOL;
static char hier_prot[HIER_MAXPROTOCOL][6]={"0","tcp","gm","mx","mvapi","openib","sm"}; static char hier_prot[HIER_MAXPROTOCOL][7]={"0","tcp","gm","mx","mvapi","openib","sm"};
static void mca_coll_hierarch_checkfor_component (struct ompi_communicator_t *comm, static void mca_coll_hierarch_checkfor_component (struct ompi_communicator_t *comm,
int component_level, int component_level,
@ -123,7 +123,7 @@ const mca_coll_base_module_1_0_0_t *
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority, mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
struct mca_coll_base_comm_t **data) struct mca_coll_base_comm_t **data)
{ {
int size; int size, rank;
int color, ncount[2], maxncount[2]; int color, ncount[2], maxncount[2];
struct mca_coll_base_comm_t *tdata=NULL; struct mca_coll_base_comm_t *tdata=NULL;
int level; int level;
@ -141,6 +141,7 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
} }
size = ompi_comm_size(comm); size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* allocate the data structure holding all information */ /* allocate the data structure holding all information */
tdata = calloc ( 1, sizeof(struct mca_coll_base_comm_t)); tdata = calloc ( 1, sizeof(struct mca_coll_base_comm_t));
@ -214,7 +215,17 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
comm->c_name, hier_prot[level]); comm->c_name, hier_prot[level]);
goto exit; goto exit;
} }
else if ( (maxncount[0] + maxncount[1]) == (size -1) ){
/* still every process would be part of this new comm,
so there is no point in creating it. */
printf("%s: every process would be part of this comm with prot %s. We continue\n",
comm->c_name, hier_prot[level]);
continue;
}
else { else {
printf("%s: %d procs talk with %s. Suggesting to use this, key %d rank %d\n",
comm->c_name, maxncount[0], hier_prot[level], color, rank);
ret = mca_coll_hierarch_gather_tmp (&color, 1, MPI_INT, tdata->hier_colorarr, 1, ret = mca_coll_hierarch_gather_tmp (&color, 1, MPI_INT, tdata->hier_colorarr, 1,
MPI_INT, 0, comm ); MPI_INT, 0, comm );
if ( OMPI_SUCCESS != ret ) { if ( OMPI_SUCCESS != ret ) {
@ -228,7 +239,8 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
tdata->hier_level = level; tdata->hier_level = level;
*data = tdata; *data = tdata;
return &intra; /* return &intra; */
goto exit;
} }
} }
@ -418,8 +430,8 @@ struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank,
return NULL; return NULL;
} }
ompi_group_decrement_proc_count (llgroup); /* ompi_group_decrement_proc_count (llgroup);
OBJ_RELEASE(llgroup); OBJ_RELEASE(llgroup); */
if ( MPI_UNDEFINED != *lrank ) { if ( MPI_UNDEFINED != *lrank ) {
found = 0; found = 0;
break; break;
@ -432,8 +444,8 @@ struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank,
*/ */
llcomm = MPI_COMM_NULL; llcomm = MPI_COMM_NULL;
} }
ompi_group_decrement_proc_count (group); /* ompi_group_decrement_proc_count (group);
OBJ_RELEASE(group); OBJ_RELEASE(group); */
return llcomm; return llcomm;
@ -538,7 +550,12 @@ mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
/* check for the required component */ /* check for the required component */
if (! strcmp (btl->btl_version.mca_component_name, component_name)){ if (! strcmp (btl->btl_version.mca_component_name, component_name)){
counter++; counter++;
if (i<firstproc ) {
firstproc = i;
} }
continue;
}
/* check for any faster components */ /* check for any faster components */
for ( j=component_level+1; j< mca_coll_hierarch_max_protocol; j++) { for ( j=component_level+1; j< mca_coll_hierarch_max_protocol; j++) {
if (!strcmp(btl->btl_version.mca_component_name, hier_prot[j])) { if (!strcmp(btl->btl_version.mca_component_name, hier_prot[j])) {
@ -546,6 +563,7 @@ mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
if (i<firstproc ) { if (i<firstproc ) {
firstproc = i; firstproc = i;
} }
break;
} }
} }
} }