1
1

Merge pull request #849 from hjelmn/add_procs

New add_procs behavior
Этот коммит содержится в:
Nathan Hjelm 2015-09-10 10:51:56 -06:00
родитель 2b8b544f2c ed005f2a61
Коммит 6a0c7b85bf
57 изменённых файлов: 1427 добавлений и 934 удалений

Просмотреть файл

@ -139,54 +139,8 @@ static opal_mutex_t ompi_cid_lock;
static opal_list_t ompi_registered_comms;
/* This variable is zero (false) if all processes in MPI_COMM_WORLD
* did not require MPI_THREAD_MULTIPLE support, and is 1 (true) as
* soon as at least one process requested support for THREAD_MULTIPLE */
static int ompi_comm_world_thread_level_mult=0;
int ompi_comm_cid_init (void)
{
#if OMPI_ENABLE_THREAD_MULTIPLE
ompi_proc_t **procs, *thisproc;
uint8_t thread_level;
uint8_t *tlpointer;
int ret;
size_t i, size, numprocs;
/** Note that the following call only returns processes
* with the same jobid. This is on purpose, since
* we switch for the dynamic communicators anyway
* to the original (slower) cid allocation algorithm.
*/
procs = ompi_proc_world ( &numprocs );
for ( i=0; i<numprocs; i++ ) {
thisproc = procs[i];
OPAL_MODEX_RECV_STRING(ret, "MPI_THREAD_LEVEL",
&thisproc->super.proc_name,
(uint8_t**)&tlpointer, &size);
if (OMPI_SUCCESS == ret) {
thread_level = *((uint8_t *) tlpointer);
if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) {
ompi_comm_world_thread_level_mult = 1;
break;
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
if (ompi_mpi_thread_multiple) {
ompi_comm_world_thread_level_mult = 1;
}
break;
} else {
return ret;
}
}
free(procs);
#else
ompi_comm_world_thread_level_mult = 0; // silence compiler warning if not used
#endif
return OMPI_SUCCESS;
}

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2010 University of Houston. All rights reserved.
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012-2014 Los Alamos National Security, LLC.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2011-2013 Inria. All rights reserved.
* Copyright (c) 2011-2013 Universite Bordeaux 1
@ -102,12 +102,26 @@ int ompi_comm_init(void)
OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t);
assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0);
group = OBJ_NEW(ompi_group_t);
group->grp_proc_pointers = ompi_proc_world(&size);
group->grp_proc_count = (int)size;
size = ompi_process_info.num_procs;
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
group->grp_proc_count = size;
for (size_t i = 0 ; i < size ; ++i) {
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
/* look for existing ompi_proc_t that matches this name */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name);
if (NULL == group->grp_proc_pointers[i]) {
/* set sentinel value */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name);
} else {
OBJ_RETAIN (group->grp_proc_pointers[i]);
}
}
OMPI_GROUP_SET_INTRINSIC (group);
OMPI_GROUP_SET_DENSE (group);
ompi_set_group_rank(group, ompi_proc_local());
ompi_group_increment_proc_count (group);
ompi_mpi_comm_world.comm.c_contextid = 0;
ompi_mpi_comm_world.comm.c_id_start_index = 4;

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science
@ -1293,6 +1293,22 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid)
{
int size = group ? ompi_group_size (group) : 0;
for (int i = 1 ; i < size ; ++i) {
opal_process_name_t name = ompi_group_get_proc_name (group, i);
if (thisjobid != ((ompi_process_name_t *) &name)->jobid) {
/* at least one is different */
return true;
}
}
return false;
}
/* All we want to do in this function is determine if the number of
* jobids in the local and/or remote group is > 1. This tells us to
* set the disconnect flag. We don't actually care what the true
@ -1300,56 +1316,30 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
*/
void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm)
{
int i;
int size, rsize;
bool found=false;
bool found;
ompi_jobid_t thisjobid;
ompi_group_t *grp=NULL;
ompi_proc_t *proc = NULL;
/* special case for MPI_COMM_NULL */
if (comm == MPI_COMM_NULL) {
return;
}
size = ompi_comm_size(comm);
rsize = ompi_comm_remote_size(comm);
thisjobid = ompi_group_get_proc_name (comm->c_local_group, 0).jobid;
/* loop over all processes in local group and check for
* a different jobid
*/
grp = comm->c_local_group;
proc = ompi_group_peer_lookup(grp,0);
thisjobid = ((ompi_process_name_t*)&proc->super.proc_name)->jobid;
for (i=1; i< size; i++) {
proc = ompi_group_peer_lookup(grp,i);
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
/* at least one is different */
found = true;
goto complete;
}
found = ompi_dpm_group_is_dyn (comm->c_local_group, thisjobid);
if (!found) {
/* if inter-comm, loop over all processes in remote_group
* and see if any are different from thisjobid
*/
found = ompi_dpm_group_is_dyn (comm->c_remote_group, thisjobid);
}
/* if inter-comm, loop over all processes in remote_group
* and see if any are different from thisjobid
*/
grp = comm->c_remote_group;
for (i=0; i< rsize; i++) {
proc = ompi_group_peer_lookup(grp,i);
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
/* at least one is different */
found = true;
break;
}
}
complete:
/* if a different jobid was found, set the disconnect flag*/
if (found) {
ompi_comm_num_dyncomm++;
OMPI_COMM_SET_DYNAMIC(comm);
}
return;
}

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2012-2013 Inria. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -49,16 +49,14 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
ompi_group_t *group2,
int *ranks2)
{
int rank, proc, proc2;
struct ompi_proc_t *proc1_pointer, *proc2_pointer;
if ( MPI_GROUP_EMPTY == group1 || MPI_GROUP_EMPTY == group2 ) {
for (proc = 0; proc < n_ranks ; proc++) {
for (int proc = 0; proc < n_ranks ; ++proc) {
ranks2[proc] = MPI_UNDEFINED;
}
return MPI_SUCCESS;
}
#if OMPI_GROUP_SPARSE
/*
* If we are translating from a parent to a child that uses the sparse format
* or vice versa, we use the translate ranks function corresponding to the
@ -80,8 +78,11 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
(group1,n_ranks,ranks1,group2,ranks2);
}
/* unknown sparse group type */
assert (0);
}
else if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/
if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/
if(OMPI_GROUP_IS_SPORADIC(group2)) {
return ompi_group_translate_ranks_sporadic
(group1,n_ranks,ranks1,group2,ranks2);
@ -95,28 +96,32 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
(group1,n_ranks,ranks1,group2,ranks2);
}
/* unknown sparse group type */
assert (0);
}
else {
/* loop over all ranks */
for (proc = 0; proc < n_ranks; proc++) {
rank=ranks1[proc];
if ( MPI_PROC_NULL == rank) {
ranks2[proc] = MPI_PROC_NULL;
}
else {
proc1_pointer = ompi_group_peer_lookup(group1 ,rank);
/* initialize to no "match" */
ranks2[proc] = MPI_UNDEFINED;
for (proc2 = 0; proc2 < group2->grp_proc_count; proc2++) {
proc2_pointer= ompi_group_peer_lookup(group2, proc2);
if ( proc1_pointer == proc2_pointer) {
ranks2[proc] = proc2;
break;
}
} /* end proc2 loop */
} /* end proc loop */
#endif
/* loop over all ranks */
for (int proc = 0; proc < n_ranks; ++proc) {
struct ompi_proc_t *proc1_pointer, *proc2_pointer;
int rank = ranks1[proc];
if ( MPI_PROC_NULL == rank) {
ranks2[proc] = MPI_PROC_NULL;
continue;
}
}
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, rank);
/* initialize to no "match" */
ranks2[proc] = MPI_UNDEFINED;
for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) {
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
if ( proc1_pointer == proc2_pointer) {
ranks2[proc] = proc2;
break;
}
} /* end proc2 loop */
} /* end proc loop */
return MPI_SUCCESS;
}
@ -168,25 +173,6 @@ int ompi_group_dump (ompi_group_t* group)
return OMPI_SUCCESS;
}
/*
* This is the function that iterates through the sparse groups to the dense group
* to reach the process pointer
*/
ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank)
{
int ranks1,ranks2;
do {
if(OMPI_GROUP_IS_DENSE(group)) {
return group->grp_proc_pointers[rank];
}
ranks1 = rank;
ompi_group_translate_ranks( group, 1, &ranks1,
group->grp_parent_group_ptr,&ranks2);
rank = ranks2;
group = group->grp_parent_group_ptr;
} while (1);
}
int ompi_group_minloc ( int list[] , int length )
{
int i,index,min;
@ -568,3 +554,23 @@ int ompi_group_compare(ompi_group_t *group1,
return return_value;
}
bool ompi_group_have_remote_peers (ompi_group_t *group)
{
for (size_t i = 0 ; i < group->grp_proc_count ; ++i) {
ompi_proc_t *proc = NULL;
#if OMPI_GROUP_SPARSE
proc = ompi_group_peer_lookup (group, i);
#else
if ((intptr_t) group->grp_proc_pointers[i] < 0) {
return true;
}
proc = group->grp_proc_pointers[i];
#endif
if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
return true;
}
}
return false;
}

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -252,8 +252,6 @@ int ompi_group_free (ompi_group_t **group);
/**
* Functions to handle process pointers for sparse group formats
*/
OMPI_DECLSPEC ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank);
int ompi_group_translate_ranks_sporadic ( ompi_group_t *group1,
int n_ranks, const int *ranks1,
ompi_group_t *group2,
@ -324,25 +322,93 @@ int ompi_group_calc_bmap ( int n, int orig_size , const int *ranks );
*/
int ompi_group_minloc (int list[], int length);
/**
* @brief Helper function for retreiving the proc of a group member in a dense group
*
* This function exists to handle the translation of sentinel group members to real
* ompi_proc_t's. If a sentinel value is found and allocate is true then this function
* looks for an existing ompi_proc_t using ompi_proc_for_name which will allocate a
* ompi_proc_t if one does not exist. If allocate is false then sentinel values translate
* to NULL.
*/
static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, const int peer_id, const bool allocate)
{
#if OPAL_ENABLE_DEBUG
if (peer_id >= group->grp_proc_count) {
opal_output(0, "ompi_group_dense_lookup: invalid peer index (%d)", peer_id);
return (struct ompi_proc_t *) NULL;
}
#endif
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
if (!allocate) {
return NULL;
}
/* replace sentinel value with an actual ompi_proc_t */
group->grp_proc_pointers[peer_id] =
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id]));
OBJ_RETAIN(group->grp_proc_pointers[peer_id]);
}
return group->grp_proc_pointers[peer_id];
}
/*
* This is the function that iterates through the sparse groups to the dense group
* to reach the process pointer
*/
static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int rank, const bool allocate)
{
#if OMPI_GROUP_SPARSE
do {
if (OMPI_GROUP_IS_DENSE(group)) {
return ompi_group_dense_lookup (group, peer_id, allocate);
}
int ranks1 = rank;
ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank);
group = group->grp_parent_group_ptr;
} while (1);
#else
return ompi_group_dense_lookup (group, rank, allocate);
#endif
}
/**
* @brief Get the raw proc pointer from the group
*
* This function will either return a ompi_proc_t if one exists (either stored in the group
* or cached in the proc hash table) or a sentinel value representing the proc. This
* differs from ompi_group_get_proc_ptr() which returns the ompi_proc_t or NULL.
*/
ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank);
static inline opal_process_name_t ompi_group_get_proc_name (ompi_group_t *group, int rank)
{
ompi_proc_t *proc = ompi_group_get_proc_ptr_raw (group, rank);
if ((intptr_t) proc < 0) {
return ompi_proc_sentinel_to_name ((intptr_t) proc);
}
return proc->super.proc_name;
}
/**
* Inline function to check if sparse groups are enabled and return the direct access
* to the proc pointer, otherwise the lookup function
*/
static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, int peer_id)
{
#if OPAL_ENABLE_DEBUG
if (peer_id >= group->grp_proc_count) {
opal_output(0, "ompi_group_lookup_peer: invalid peer index (%d)", peer_id);
return (struct ompi_proc_t *) NULL;
}
#endif
#if OMPI_GROUP_SPARSE
return ompi_group_get_proc_ptr (group, peer_id);
#else
return group->grp_proc_pointers[peer_id];
#endif
return ompi_group_get_proc_ptr (group, peer_id, true);
}
static inline struct ompi_proc_t *ompi_group_peer_lookup_existing (ompi_group_t *group, int peer_id)
{
return ompi_group_get_proc_ptr (group, peer_id, false);
}
bool ompi_group_have_remote_peers (ompi_group_t *group);
/**
* Function to print the group info
*/

Просмотреть файл

@ -210,14 +210,13 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size)
*/
void ompi_group_increment_proc_count(ompi_group_t *group)
{
int proc;
ompi_proc_t * proc_pointer;
for (proc = 0; proc < group->grp_proc_count; proc++) {
proc_pointer = ompi_group_peer_lookup(group,proc);
OBJ_RETAIN(proc_pointer);
for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
proc_pointer = ompi_group_peer_lookup_existing (group, proc);
if (proc_pointer) {
OBJ_RETAIN(proc_pointer);
}
}
return;
}
/*
@ -226,14 +225,13 @@ void ompi_group_increment_proc_count(ompi_group_t *group)
void ompi_group_decrement_proc_count(ompi_group_t *group)
{
int proc;
ompi_proc_t * proc_pointer;
for (proc = 0; proc < group->grp_proc_count; proc++) {
proc_pointer = ompi_group_peer_lookup(group,proc);
OBJ_RELEASE(proc_pointer);
for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
proc_pointer = ompi_group_peer_lookup_existing (group, proc);
if (proc_pointer) {
OBJ_RELEASE(proc_pointer);
}
}
return;
}
/*
@ -255,9 +253,6 @@ static void ompi_group_construct(ompi_group_t *new_group)
/* default the sparse values for groups */
new_group->grp_parent_group_ptr = NULL;
/* return */
return;
}
@ -300,9 +295,6 @@ static void ompi_group_destruct(ompi_group_t *group)
opal_pointer_array_set_item(&ompi_group_f_to_c_table,
group->grp_f_to_c_index, NULL);
}
/* return */
return;
}

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006-2007 University of Houston. All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -29,6 +29,66 @@
#include <math.h>
static int ompi_group_dense_overlap (ompi_group_t *group1, ompi_group_t *group2, opal_bitmap_t *bitmap)
{
ompi_proc_t *proc1_pointer, *proc2_pointer;
int rc, overlap_count;
overlap_count = 0;
for (int proc1 = 0 ; proc1 < group1->grp_proc_count ; ++proc1) {
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1);
/* check to see if this proc is in group2 */
for (int proc2 = 0 ; proc2 < group2->grp_proc_count ; ++proc2) {
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
if( proc1_pointer == proc2_pointer ) {
rc = opal_bitmap_set_bit (bitmap, proc2);
if (OPAL_SUCCESS != rc) {
return rc;
}
++overlap_count;
break;
}
} /* end proc1 loop */
} /* end proc loop */
return overlap_count;
}
static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, const int peer_id)
{
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
ompi_proc_t *proc =
(ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id]));
if (NULL != proc) {
/* replace sentinel value with an actual ompi_proc_t */
group->grp_proc_pointers[peer_id] = proc;
/* retain the proc */
OBJ_RETAIN(group->grp_proc_pointers[peer_id]);
}
}
return group->grp_proc_pointers[peer_id];
}
ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank)
{
#if OMPI_GROUP_SPARSE
do {
if (OMPI_GROUP_IS_DENSE(group)) {
return ompi_group_dense_lookup_raw (group, peer_id);
}
int ranks1 = rank;
ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank);
group = group->grp_parent_group_ptr;
} while (1);
#else
return ompi_group_dense_lookup_raw (group, rank);
#endif
}
int ompi_group_calc_plist ( int n , const int *ranks ) {
return sizeof(char *) * n ;
}
@ -37,9 +97,8 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
ompi_group_t **new_group)
{
/* local variables */
int proc,my_group_rank;
int my_group_rank;
ompi_group_t *group_pointer, *new_group_pointer;
ompi_proc_t *my_proc_pointer;
group_pointer = (ompi_group_t *)group;
@ -56,9 +115,9 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
}
/* put group elements in the list */
for (proc = 0; proc < n; proc++) {
for (int proc = 0; proc < n; proc++) {
new_group_pointer->grp_proc_pointers[proc] =
ompi_group_peer_lookup(group_pointer,ranks[proc]);
ompi_group_get_proc_ptr_raw (group_pointer, ranks[proc]);
} /* end proc loop */
/* increment proc reference counters */
@ -67,10 +126,8 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
/* find my rank */
my_group_rank=group_pointer->grp_my_rank;
if (MPI_UNDEFINED != my_group_rank) {
my_proc_pointer=ompi_group_peer_lookup (group_pointer,my_group_rank);
ompi_set_group_rank(new_group_pointer,my_proc_pointer);
}
else {
ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
} else {
new_group_pointer->grp_my_rank = MPI_UNDEFINED;
}
@ -87,114 +144,77 @@ int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2,
ompi_group_t **new_group)
{
/* local variables */
int new_group_size, proc1, proc2, found_in_group;
int my_group_rank, cnt;
ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer;
ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL;
group1_pointer = (ompi_group_t *) group1;
group2_pointer = (ompi_group_t *) group2;
int new_group_size, cnt, rc, overlap_count;
ompi_group_t *new_group_pointer;
ompi_proc_t *proc2_pointer;
opal_bitmap_t bitmap;
/*
* form union
*/
/* get new group size */
new_group_size = group1_pointer->grp_proc_count;
OBJ_CONSTRUCT(&bitmap, opal_bitmap_t);
rc = opal_bitmap_init (&bitmap, 32);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* check group2 elements to see if they need to be included in the list */
for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) {
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
/* check to see if this proc2 is alread in the group */
found_in_group = 0;
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
if (proc1_pointer == proc2_pointer) {
/* proc2 is in group1 - don't double count */
found_in_group = 1;
break;
}
} /* end proc1 loop */
if (found_in_group) {
continue;
}
new_group_size++;
} /* end proc loop */
overlap_count = ompi_group_dense_overlap (group1, group2, &bitmap);
if (0 > overlap_count) {
OBJ_DESTRUCT(&bitmap);
return overlap_count;
}
new_group_size = group1->grp_proc_count + group2->grp_proc_count - overlap_count;
if ( 0 == new_group_size ) {
*new_group = MPI_GROUP_EMPTY;
OBJ_RETAIN(MPI_GROUP_EMPTY);
OBJ_DESTRUCT(&bitmap);
return MPI_SUCCESS;
}
/* get new group struct */
new_group_pointer = ompi_group_allocate(new_group_size);
if (NULL == new_group_pointer) {
OBJ_DESTRUCT(&bitmap);
return MPI_ERR_GROUP;
}
/* fill in the new group list */
/* put group1 elements in the list */
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
for (int proc1 = 0; proc1 < group1->grp_proc_count; ++proc1) {
new_group_pointer->grp_proc_pointers[proc1] =
ompi_group_peer_lookup(group1_pointer,proc1);
ompi_group_get_proc_ptr_raw (group1, proc1);
}
cnt = group1_pointer->grp_proc_count;
cnt = group1->grp_proc_count;
/* check group2 elements to see if they need to be included in the list */
for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) {
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
/* check to see if this proc2 is alread in the group */
found_in_group = 0;
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
if (proc1_pointer == proc2_pointer) {
/* proc2 is in group1 - don't double count */
found_in_group = 1;
break;
}
} /* end proc1 loop */
if (found_in_group) {
for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) {
if (opal_bitmap_is_set_bit (&bitmap, proc2)) {
continue;
}
new_group_pointer->grp_proc_pointers[cnt] =
ompi_group_peer_lookup(group2_pointer,proc2);
cnt++;
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
new_group_pointer->grp_proc_pointers[cnt++] = proc2_pointer;
} /* end proc loop */
OBJ_DESTRUCT(&bitmap);
/* increment proc reference counters */
ompi_group_increment_proc_count(new_group_pointer);
/* find my rank */
my_group_rank = group1_pointer->grp_my_rank;
if (MPI_UNDEFINED == my_group_rank) {
my_group_rank = group2_pointer->grp_my_rank;
if ( MPI_UNDEFINED != my_group_rank) {
my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank);
}
if (MPI_UNDEFINED != group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) {
ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
} else {
my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank);
}
if ( MPI_UNDEFINED == my_group_rank ) {
new_group_pointer->grp_my_rank = MPI_UNDEFINED;
}
else {
ompi_set_group_rank(new_group_pointer, my_proc_pointer);
}
*new_group = (MPI_Group) new_group_pointer;
return OMPI_SUCCESS;
}
@ -206,96 +226,65 @@ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2,
ompi_group_t **new_group) {
/* local varibles */
int new_group_size, proc1, proc2, found_in_group2, cnt;
int my_group_rank;
ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer;
ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL;
group1_pointer=(ompi_group_t *)group1;
group2_pointer=(ompi_group_t *)group2;
int new_group_size, overlap_count, rc;
ompi_group_t *new_group_pointer;
ompi_proc_t *proc1_pointer;
opal_bitmap_t bitmap;
/*
* form union
*/
/* get new group size */
new_group_size=0;
OBJ_CONSTRUCT(&bitmap, opal_bitmap_t);
rc = opal_bitmap_init (&bitmap, 32);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* loop over group1 members */
for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) {
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
/* check to see if this proc is in group2 */
found_in_group2=0;
for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) {
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
if( proc1_pointer == proc2_pointer ) {
found_in_group2=true;
break;
}
} /* end proc1 loop */
if(found_in_group2) {
continue;
}
new_group_size++;
} /* end proc loop */
/* check group2 elements to see if they need to be included in the list */
overlap_count = ompi_group_dense_overlap (group2, group1, &bitmap);
if (0 > overlap_count) {
OBJ_DESTRUCT(&bitmap);
return overlap_count;
}
new_group_size = group1->grp_proc_count - overlap_count;
if ( 0 == new_group_size ) {
*new_group = MPI_GROUP_EMPTY;
OBJ_RETAIN(MPI_GROUP_EMPTY);
OBJ_DESTRUCT(&bitmap);
return MPI_SUCCESS;
}
/* allocate a new ompi_group_t structure */
new_group_pointer=ompi_group_allocate(new_group_size);
new_group_pointer = ompi_group_allocate(new_group_size);
if( NULL == new_group_pointer ) {
OBJ_DESTRUCT(&bitmap);
return MPI_ERR_GROUP;
}
/* fill in group list */
cnt=0;
/* loop over group1 members */
for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) {
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
/* check to see if this proc is in group2 */
found_in_group2=0;
for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) {
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
if( proc1_pointer == proc2_pointer ) {
found_in_group2=true;
break;
}
} /* end proc1 loop */
if(found_in_group2) {
for (int proc1 = 0, cnt = 0 ; proc1 < group1->grp_proc_count ; ++proc1) {
if (opal_bitmap_is_set_bit (&bitmap, proc1)) {
continue;
}
new_group_pointer->grp_proc_pointers[cnt] =
ompi_group_peer_lookup(group1_pointer,proc1);
cnt++;
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1);
new_group_pointer->grp_proc_pointers[cnt++] = proc1_pointer;
} /* end proc loop */
OBJ_DESTRUCT(&bitmap);
/* increment proc reference counters */
ompi_group_increment_proc_count(new_group_pointer);
/* find my rank */
my_group_rank=group1_pointer->grp_my_rank;
if ( MPI_UNDEFINED != my_group_rank ) {
my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank);
}
else {
my_group_rank=group2_pointer->grp_my_rank;
if ( MPI_UNDEFINED != my_group_rank ) {
my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank);
}
}
if ( MPI_UNDEFINED == my_group_rank ) {
if (MPI_UNDEFINED == group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) {
new_group_pointer->grp_my_rank = MPI_UNDEFINED;
}
else {
ompi_set_group_rank(new_group_pointer,my_proc_pointer);
} else {
ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
}
*new_group = (MPI_Group)new_group_pointer;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 University of Houston. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -38,12 +41,10 @@ void ompi_set_group_rank(ompi_group_t *group, struct ompi_proc_t *proc_pointer)
for (proc = 0; proc < group->grp_proc_count; proc++) {
/* check and see if this proc pointer matches proc_pointer
*/
if (ompi_group_peer_lookup(group,proc) == proc_pointer) {
if (ompi_group_peer_lookup_existing (group, proc) == proc_pointer) {
group->grp_my_rank = proc;
}
break;
}
} /* end proc loop */
}
/* return */
return;
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,6 +28,7 @@
#include "ompi/mca/mca.h"
#include "opal/mca/base/mca_base_framework.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/proc/proc.h"
/*
@ -60,6 +64,14 @@ OMPI_DECLSPEC extern mca_bml_base_component_t mca_bml_component;
OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml;
OMPI_DECLSPEC extern mca_base_framework_t ompi_bml_base_framework;
static inline struct mca_bml_base_endpoint_t *mca_bml_base_get_endpoint (struct ompi_proc_t *proc) {
if (OPAL_UNLIKELY(NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML])) {
mca_bml.bml_add_proc (proc);
}
return (struct mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
}
END_C_DECLS
#endif /* MCA_BML_BASE_H */

Просмотреть файл

@ -160,14 +160,11 @@ static inline bool mca_bml_base_btl_array_remove( mca_bml_base_btl_array_t* arra
*/
static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_index(mca_bml_base_btl_array_t* array, size_t item_index)
{
#if OPAL_ENABLE_DEBUG
if(item_index >= array->arr_size) {
opal_output(0, "mca_bml_base_btl_array_get_index: invalid array index %lu >= %lu",
(unsigned long)item_index, (unsigned long)array->arr_size);
return 0;
if (item_index < array->arr_size) {
return &array->bml_btls[item_index];
}
#endif
return &array->bml_btls[item_index];
return NULL;
}
/**
@ -441,7 +438,7 @@ typedef int (*mca_bml_base_module_finalize_fn_t)( void );
* @return OMPI_SUCCESS or error status on failure.
*
* The mca_bml_base_module_add_procs_fn_t() is called by the PML to
* determine the set of BMLs that should be used to reach each process.
* determine the set of BTLs that should be used to reach each process.
* Any addressing information exported by the peer via the mca_base_modex_send()
* function should be available during this call via the corresponding
* mca_base_modex_recv() function. The BML may utilize this information to
@ -465,6 +462,25 @@ typedef int (*mca_bml_base_module_add_procs_fn_t)(
struct opal_bitmap_t* reachable
);
/**
* PML->BML notification of change in the process list.
*
* @param proc (IN) Process
* @return OMPI_SUCCESS or error status on failure.
*
* The mca_bml_base_module_add_proc_fn_t() is called by the PML to
* determine the set of BTLs that should be used to reach each process.
* Any addressing information exported by the peer via the mca_base_modex_send()
* function should be available during this call via the corresponding
* mca_base_modex_recv() function. The BML may utilize this information to
* determine reachability of each peer process.
*
* \note This function will return OMPI_ERR_UNREACH if the process can not
* be reached by a currently active BTL. This is not a fatal error, and the
* calling layer is free to continue using the BML interface.
*/
typedef int (*mca_bml_base_module_add_proc_fn_t) (struct ompi_proc_t *proc);
/**
* Notification of change to the process list.
*
@ -559,6 +575,7 @@ struct mca_bml_base_module_t {
mca_bml_base_component_t* bml_component; /**< pointer back to the BML component structure */
/* BML function table */
mca_bml_base_module_add_proc_fn_t bml_add_proc;
mca_bml_base_module_add_procs_fn_t bml_add_procs;
mca_bml_base_module_del_procs_fn_t bml_del_procs;
mca_bml_base_module_add_btl_fn_t bml_add_btl;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
@ -144,6 +144,293 @@ static void mca_bml_r2_calculate_bandwidth_latency (mca_bml_base_btl_array_t *bt
}
}
static mca_bml_base_endpoint_t *mca_bml_r2_allocate_endpoint (ompi_proc_t *proc) {
mca_bml_base_endpoint_t *bml_endpoint;
/* allocate bml specific proc data */
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
if (NULL == bml_endpoint) {
opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
return NULL;
}
/* preallocate space in array for max number of r2s */
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_max_send_size = -1;
bml_endpoint->btl_proc = proc;
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
bml_endpoint->btl_flags_or = 0;
return bml_endpoint;
}
static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl)
{
if (NULL != btl->btl_component->btl_progress) {
bool found = false;
for (size_t p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
found = true;
break;
}
}
if (found == false) {
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
btl->btl_component->btl_progress;
opal_progress_register (btl->btl_component->btl_progress);
}
}
}
static int mca_bml_r2_endpoint_add_btl (struct ompi_proc_t *proc, mca_bml_base_endpoint_t *bml_endpoint,
mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *btl_endpoint)
{
mca_bml_base_btl_t* bml_btl = NULL;
int btl_flags = btl->btl_flags;
bool btl_in_use = false;
size_t size;
/* NTH: these flags should have been sanitized by the btl. Once that is verified these
* checks can be safely removed. */
if ((btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put)) {
opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
" the %s BTL without any PUT function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_PUT;
}
if ((btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get)) {
opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
" the %s BTL without any GET function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_GET;
}
if ((btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0) {
/* If no protocol specified, we have 2 choices: we ignore the BTL
* as we don't know which protocl to use, or we suppose that all
* BTLs support the send protocol. This is really a btl error as
* these flags should have been sanitized by the btl. */
btl_flags |= MCA_BTL_FLAGS_SEND;
}
if (btl_flags & MCA_BTL_FLAGS_SEND) {
/* dont allow an additional BTL with a lower exclusivity ranking */
bml_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_send, size - 1);
size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
if (!bml_btl || bml_btl->btl->btl_exclusivity < btl->btl_exclusivity) {
/* this btl has higher exclusivity than an existing btl or none exists */
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"mca: bml: Using %s btl for send to %s on node %s",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name),
proc->super.proc_hostname);
/* cache the endpoint on the proc */
if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) {
bml_btl = mca_bml_base_btl_array_insert (&bml_endpoint->btl_send);
bml_btl->btl = btl;
bml_btl->btl_endpoint = btl_endpoint;
bml_btl->btl_weight = 0;
bml_btl->btl_flags = btl_flags;
/**
* calculate the bitwise OR of the btl flags
*/
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
} else {
opal_output_verbose(20, opal_btl_base_framework.framework_output,
"mca: bml: Not using %s btl for send to %s on node %s "
"because %s btl has higher exclusivity (%d > %d)",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname,
bml_btl->btl->btl_component->btl_version.mca_component_name,
bml_btl->btl->btl_exclusivity,
btl->btl_exclusivity);
}
btl_in_use = true;
}
}
/* always add rdma endpoints */
if ((btl_flags & MCA_BTL_FLAGS_RDMA) &&
!((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) &&
(0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
bml_btl_rdma->btl = btl;
bml_btl_rdma->btl_endpoint = btl_endpoint;
bml_btl_rdma->btl_weight = 0;
bml_btl_rdma->btl_flags = btl_flags;
if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) {
bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length;
}
if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) {
bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size;
}
btl_in_use = true;
}
return btl_in_use ? OMPI_SUCCESS : OMPI_ERR_NOT_AVAILABLE;
}
static void mca_bml_r2_compute_endpoint_metrics (mca_bml_base_endpoint_t *bml_endpoint)
{
double total_bandwidth = 0;
uint32_t latency;
size_t n_send, n_rdma;
/* (1) determine the total bandwidth available across all btls
* note that we need to do this here, as we may already have btls configured
* (2) determine the highest priority ranking for latency
* (3) compute the maximum amount of bytes that can be send without any
* weighting. Once the left over is smaller than this number we will
* start using the weight to compute the correct amount.
*/
n_send = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
n_rdma = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
/* sort BTLs in descending order according to bandwidth value */
qsort (bml_endpoint->btl_send.bml_btls, n_send,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
bml_endpoint->btl_rdma_index = 0;
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency);
/* (1) set the weight of each btl as a percentage of overall bandwidth
* (2) copy all btl instances at the highest priority ranking into the
* list of btls used for first fragments
*/
for (size_t n_index = 0 ; n_index < n_send ; ++n_index) {
mca_bml_base_btl_t *bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
mca_btl_base_module_t *btl = bml_btl->btl;
/* compute weighting factor for this r2 */
if(btl->btl_bandwidth > 0) {
bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_send);
}
/* check to see if this r2 is already in the array of r2s
* used for first fragments - if not add it.
*/
if(btl->btl_latency == latency) {
mca_bml_base_btl_t* bml_btl_new =
mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
*bml_btl_new = *bml_btl;
}
/* set endpoint max send size as min of available btls */
if (bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
bml_endpoint->btl_max_send_size = btl->btl_max_send_size;
}
/* sort BTLs in descending order according to bandwidth value */
qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency);
/* set rdma btl weights */
for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) {
mca_bml_base_btl_t *bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index);
/* compute weighting factor for this r2 */
if (bml_btl->btl->btl_bandwidth > 0.0) {
bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_rdma);
}
}
}
static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
{
mca_bml_base_endpoint_t *bml_endpoint;
/* at least one btl is in use */
bool btl_in_use;
int rc;
if (OPAL_UNLIKELY(NULL == proc)) {
return OMPI_ERR_BAD_PARAM;
}
/* check if this endpoint is already set up */
if (NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
OBJ_RETAIN(proc);
return OMPI_SUCCESS;
}
/* add btls if not already done */
if (OMPI_SUCCESS != (rc = mca_bml_r2_add_btls())) {
return rc;
}
bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
if (OPAL_UNLIKELY(NULL == bml_endpoint)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (int p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
struct mca_btl_base_endpoint_t *btl_endpoint = NULL;
/* if the r2 can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap
* and can return addressing information for each proc
* that is passed back to the r2 on data transfer calls
*/
rc = btl->btl_add_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint, NULL);
if (OMPI_SUCCESS != rc || NULL == btl_endpoint) {
/* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
* can take care of this task. */
continue;
}
rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoint);
if (OMPI_SUCCESS != rc) {
btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint);
} else {
mca_bml_r2_register_progress (btl);
btl_in_use = true;
}
}
if (!btl_in_use) {
/* no btl is available for this proc */
if (mca_bml_r2.show_unreach_errors) {
opal_show_help ("help-mca-bml-r2.txt", "unreachable proc", true,
OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
(NULL != ompi_proc_local_proc->super.proc_hostname ?
ompi_proc_local_proc->super.proc_hostname : "unknown!"),
OMPI_NAME_PRINT(&(proc->super.proc_name)),
(NULL != proc->super.proc_hostname ?
proc->super.proc_hostname : "unknown!"),
btl_names);
}
return OMPI_ERR_UNREACH;
}
/* compute metrics for registered btls */
mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
return OMPI_SUCCESS;
}
/*
* For each proc setup a datastructure that indicates the BTLs
* that can be used to reach the destination.
@ -154,7 +441,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
struct ompi_proc_t** procs,
struct opal_bitmap_t* reachable )
{
size_t p, p_index, n_new_procs = 0;
size_t n_new_procs = 0;
struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;
struct ompi_proc_t** new_procs = NULL;
int rc, ret = OMPI_SUCCESS;
@ -170,7 +457,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
/* Select only the procs that don't yet have the BML proc struct. This prevent
* us from calling btl->add_procs several times on the same destination proc.
*/
for(p_index = 0; p_index < nprocs; p_index++) {
for (size_t p_index = 0 ; p_index < nprocs ; ++p_index) {
struct ompi_proc_t* proc = procs[p_index];
if(NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
@ -203,10 +490,9 @@ static int mca_bml_r2_add_procs( size_t nprocs,
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
for (size_t p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
int btl_inuse = 0;
int btl_flags;
/* if the r2 can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap
@ -217,240 +503,69 @@ static int mca_bml_r2_add_procs( size_t nprocs,
memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*));
rc = btl->btl_add_procs(btl, n_new_procs, (opal_proc_t**)new_procs, btl_endpoints, reachable);
if(OMPI_SUCCESS != rc) {
/* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
* can take care of this task.
*/
if (OMPI_SUCCESS != rc) {
/* This BTL encountered an error while adding procs. Continue in case some other
* BTL(s) can be used. */
continue;
}
/* for each proc that is reachable */
for( p = 0; p < n_new_procs; p++ ) {
if(opal_bitmap_is_set_bit(reachable, p)) {
ompi_proc_t *proc = new_procs[p];
mca_bml_base_endpoint_t * bml_endpoint =
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_bml_base_btl_t* bml_btl = NULL;
size_t size;
if(NULL == bml_endpoint) {
/* allocate bml specific proc data */
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
if (NULL == bml_endpoint) {
opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
free(btl_endpoints);
free(new_procs);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* preallocate space in array for max number of r2s */
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_max_send_size = -1;
bml_endpoint->btl_proc = proc;
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
bml_endpoint->btl_flags_or = 0;
}
btl_flags = btl->btl_flags;
if( (btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) {
opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
" the %s BTL without any PUT function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_PUT;
}
if( (btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) {
opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
" the %s BTL without any GET function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_GET;
}
if( (btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
/**
* If no protocol specified, we have 2 choices: we ignore the BTL
* as we don't know which protocl to use, or we suppose that all
* BTLs support the send protocol.
*/
btl_flags |= MCA_BTL_FLAGS_SEND;
}
/* dont allow an additional BTL with a lower exclusivity ranking */
size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
if(size > 0) {
bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
/* skip this btl if the exclusivity is less than the previous only if the btl does not provide full rdma (for one-sided) */
if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity && ((btl_flags & MCA_BTL_FLAGS_RDMA) != MCA_BTL_FLAGS_RDMA)) {
btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]);
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_bml_base_framework.framework_output,
"mca: bml: Not using %s btl to %s on node %s "
"because %s btl has higher exclusivity (%d > %d)",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname,
bml_btl->btl->btl_component->btl_version.mca_component_name,
bml_btl->btl->btl_exclusivity,
btl->btl_exclusivity);
continue;
}
}
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_bml_base_framework.framework_output,
"mca: bml: Using %s btl to %s on node %s",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name),
proc->super.proc_hostname);
/* cache the endpoint on the proc */
if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) {
bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
bml_btl->btl = btl;
bml_btl->btl_endpoint = btl_endpoints[p];
bml_btl->btl_weight = 0;
bml_btl->btl_flags = btl_flags;
/**
* calculate the bitwise OR of the btl flags
*/
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
}
/* always add rdma endpoints */
if ((btl_flags & MCA_BTL_FLAGS_RDMA) &&
!((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) &&
(0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
bml_btl_rdma->btl = btl;
bml_btl_rdma->btl_endpoint = btl_endpoints[p];
bml_btl_rdma->btl_weight = 0;
bml_btl_rdma->btl_flags = btl_flags;
if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) {
bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length;
}
if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) {
bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size;
}
}
/* This BTL is in use, allow the progress registration */
btl_inuse++;
for (size_t p = 0 ; p < n_new_procs ; ++p) {
if (!opal_bitmap_is_set_bit(reachable, p)) {
continue;
}
ompi_proc_t *proc = new_procs[p];
mca_bml_base_endpoint_t *bml_endpoint =
(mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_bml_base_btl_t *bml_btl = NULL;
size_t size;
if (NULL == bml_endpoint) {
bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
if (NULL == bml_endpoint) {
free(btl_endpoints);
free(new_procs);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoints[p]);
if (OMPI_SUCCESS != rc) {
btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]);
continue;
}
/* This BTL is in use, allow the progress registration */
btl_inuse++;
}
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
size_t p;
bool found = false;
for( p = 0; p < mca_bml_r2.num_btl_progress; p++ ) {
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
found = true;
break;
}
}
if(found == false) {
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] =
btl->btl_component->btl_progress;
mca_bml_r2.num_btl_progress++;
opal_progress_register( btl->btl_component->btl_progress );
}
if (btl_inuse) {
mca_bml_r2_register_progress (btl);
}
}
free(btl_endpoints);
/* iterate back through procs and compute metrics for registered r2s */
for(p=0; p<n_new_procs; p++) {
ompi_proc_t *proc = new_procs[p];
mca_bml_base_endpoint_t* bml_endpoint =
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
double total_bandwidth = 0;
uint32_t latency;
size_t n_send, n_rdma;
for (size_t p = 0; p < n_new_procs ; ++p) {
mca_bml_base_endpoint_t *bml_endpoint =
(mca_bml_base_endpoint_t *) new_procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
/* skip over procs w/ no btl's registered */
if(NULL == bml_endpoint) {
continue;
}
/* (1) determine the total bandwidth available across all btls
* note that we need to do this here, as we may already have btls configured
* (2) determine the highest priority ranking for latency
* (3) compute the maximum amount of bytes that can be send without any
* weighting. Once the left over is smaller than this number we will
* start using the weight to compute the correct amount.
*/
n_send = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
n_rdma = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
/* sort BTLs in descending order according to bandwidth value */
qsort(bml_endpoint->btl_send.bml_btls, n_send,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
bml_endpoint->btl_rdma_index = 0;
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency);
/* (1) set the weight of each btl as a percentage of overall bandwidth
* (2) copy all btl instances at the highest priority ranking into the
* list of btls used for first fragments
*/
for (size_t n_index = 0 ; n_index < n_send ; ++n_index) {
mca_bml_base_btl_t* bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
mca_btl_base_module_t *btl = bml_btl->btl;
/* compute weighting factor for this r2 */
if(btl->btl_bandwidth > 0) {
bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_send);
}
/* check to see if this r2 is already in the array of r2s
* used for first fragments - if not add it.
*/
if(btl->btl_latency == latency) {
mca_bml_base_btl_t* bml_btl_new =
mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
*bml_btl_new = *bml_btl;
}
/* set endpoint max send size as min of available btls */
if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
bml_endpoint->btl_max_send_size = btl->btl_max_send_size;
}
/* sort BTLs in descending order according to bandwidth value */
qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency);
/* set rdma btl weights */
for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) {
mca_bml_base_btl_t *bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index);
/* compute weighting factor for this r2 */
if (bml_btl->btl->btl_bandwidth > 0.0) {
bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_rdma);
}
if (NULL != bml_endpoint) {
mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
}
}
/* see if we have a connection to everyone else */
for(p = 0; p < n_new_procs; p++) {
for(size_t p = 0; p < n_new_procs ; ++p) {
ompi_proc_t *proc = new_procs[p];
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
ret = OMPI_ERR_UNREACH;
if (mca_bml_r2.show_unreach_errors) {
opal_show_help("help-mca-bml-r2.txt",
"unreachable proc",
true,
opal_show_help("help-mca-bml-r2.txt", "unreachable proc", true,
OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
(NULL != ompi_proc_local_proc->super.proc_hostname ?
ompi_proc_local_proc->super.proc_hostname : "unknown!"),
@ -459,6 +574,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
proc->super.proc_hostname : "unknown!"),
btl_names);
}
break;
}
}
@ -476,7 +592,6 @@ static int mca_bml_r2_add_procs( size_t nprocs,
static int mca_bml_r2_del_procs(size_t nprocs,
struct ompi_proc_t** procs)
{
size_t p;
int rc;
struct ompi_proc_t** del_procs = (struct ompi_proc_t**)
malloc(nprocs * sizeof(struct ompi_proc_t*));
@ -486,26 +601,27 @@ static int mca_bml_r2_del_procs(size_t nprocs,
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(p = 0; p < nprocs; p++) {
for (size_t p = 0 ; p < nprocs ; ++p) {
ompi_proc_t *proc = procs[p];
/* We much check that there are 2 references to the proc (not 1). The
* first reference belongs to ompi/proc the second belongs to the bml
* since we retained it. We will release that reference at the end of
* the loop below. */
if(((opal_object_t*)proc)->obj_reference_count == 2) {
if (((opal_object_t*)proc)->obj_reference_count == 2 &&
NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
del_procs[n_del_procs++] = proc;
}
}
for(p = 0; p < n_del_procs; p++) {
for (size_t p = 0 ; p < n_del_procs ; ++p) {
ompi_proc_t *proc = del_procs[p];
mca_bml_base_endpoint_t* bml_endpoint =
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
size_t f_index, f_size;
size_t f_size;
/* notify each btl that the proc is going away */
f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
for(f_index = 0; f_index < f_size; f_index++) {
for (size_t f_index = 0 ; f_index < f_size ; ++f_index) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, f_index);
mca_btl_base_module_t* btl = bml_btl->btl;
@ -521,10 +637,12 @@ static int mca_bml_r2_del_procs(size_t nprocs,
*/
}
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
OBJ_RELEASE(proc);
/* do any required cleanup */
OBJ_RELEASE(bml_endpoint);
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
}
free(del_procs);
@ -835,6 +953,7 @@ int mca_bml_r2_component_fini(void)
mca_bml_r2_module_t mca_bml_r2 = {
.super = {
.bml_component = &mca_bml_r2_component,
.bml_add_proc = mca_bml_r2_add_proc,
.bml_add_procs = mca_bml_r2_add_procs,
.bml_del_procs = mca_bml_r2_del_procs,
.bml_add_btl = mca_bml_r2_add_btl,
@ -843,8 +962,7 @@ mca_bml_r2_module_t mca_bml_r2 = {
.bml_register = mca_bml_r2_register,
.bml_register_error = mca_bml_r2_register_error,
.bml_finalize = mca_bml_r2_finalize,
.bml_ft_event = mca_bml_r2_ft_event
}
.bml_ft_event = mca_bml_r2_ft_event,
},
};

Просмотреть файл

@ -35,25 +35,6 @@ int mca_coll_fca_init_query(bool enable_progress_threads,
return OMPI_SUCCESS;
}
static int have_remote_peers(ompi_group_t *group, size_t size, int *local_peers)
{
ompi_proc_t *proc;
size_t i;
int ret;
*local_peers = 0;
ret = 0;
for (i = 0; i < size; ++i) {
proc = ompi_group_peer_lookup(group, i);
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
++*local_peers;
} else {
ret = 1;
}
}
return ret;
}
static inline ompi_proc_t* __local_rank_lookup(ompi_communicator_t *comm, int rank)
{
return ompi_group_peer_lookup(comm->c_local_group, rank);
@ -618,7 +599,7 @@ mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority)
if (size < mca_coll_fca_component.fca_np)
goto exit;
if (!have_remote_peers(comm->c_local_group, size, &local_peers) || OMPI_COMM_IS_INTER(comm))
if (!ompi_group_have_remote_peers(comm->c_local_group) || OMPI_COMM_IS_INTER(comm))
goto exit;
fca_module = OBJ_NEW(mca_coll_fca_module_t);

Просмотреть файл

@ -74,7 +74,6 @@ uint32_t mca_coll_sm_one = 1;
*/
static int sm_module_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm);
static bool have_local_peers(ompi_group_t *group, size_t size);
static int bootstrap_comm(ompi_communicator_t *comm,
mca_coll_sm_module_t *module);
static int mca_coll_sm_module_disable(mca_coll_base_module_t *module,
@ -172,8 +171,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority)
/* If we're intercomm, or if there's only one process in the
communicator, or if not all the processes in the communicator
are not on this node, then we don't want to run */
if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) ||
!have_local_peers(comm->c_local_group, ompi_comm_size(comm))) {
if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || ompi_group_have_remote_peers (comm->c_local_group)) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name);
return NULL;
@ -490,23 +488,6 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
return OMPI_SUCCESS;
}
static bool have_local_peers(ompi_group_t *group, size_t size)
{
size_t i;
ompi_proc_t *proc;
for (i = 0; i < size; ++i) {
proc = ompi_group_peer_lookup(group,i);
if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
return false;
}
}
return true;
}
static int bootstrap_comm(ompi_communicator_t *comm,
mca_coll_sm_module_t *module)
{

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -23,6 +26,7 @@
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "ompi/proc/proc.h"
#include "opal/datatype/opal_convertor.h"
#include <psm.h>
#include <psm_mq.h>

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -54,5 +57,14 @@ struct mca_mtl_psm_endpoint_t {
typedef struct mca_mtl_psm_endpoint_t mca_mtl_psm_endpoint_t;
OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint);
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
{
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
ompi_mtl_psm_add_procs (mtl, 1, &ompi_proc);
}
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
}
END_C_DECLS
#endif

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -42,7 +45,7 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
int ret;
size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm.super);
@ -94,7 +97,7 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl,
mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request;
size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*)ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm.super);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -24,6 +27,7 @@
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "ompi/proc/proc.h"
#include "opal/datatype/opal_convertor.h"
#include <psm2.h>
#include <psm2_mq.h>

Просмотреть файл

@ -55,5 +55,14 @@ struct mca_mtl_psm2_endpoint_t {
typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t;
OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint);
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
{
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
ompi_mtl_psm2_add_procs (mtl, 1, &ompi_proc);
}
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
}
END_C_DECLS
#endif

Просмотреть файл

@ -43,7 +43,7 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl,
int ret;
size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm2.super);
@ -95,7 +95,7 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl,
mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request;
size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*)ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm2.super);

Просмотреть файл

@ -299,7 +299,7 @@ ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank)
static inline ptl_process_t
ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank)
{
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank);
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank, true);
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
}

Просмотреть файл

@ -134,10 +134,8 @@ check_win_ok(ompi_communicator_t *comm, int flavor)
return OMPI_ERR_NOT_SUPPORTED;
}
for (i = 0 ; i < ompi_comm_size(comm) ; ++i) {
if (!OPAL_PROC_ON_LOCAL_NODE(ompi_comm_peer_lookup(comm, i)->super.proc_flags)) {
return OMPI_ERR_RMA_SHARED;
}
if (ompi_group_have_remote_peers (comm->c_local_group)) {
return OMPI_ERR_RMA_SHARED;
}
return OMPI_SUCCESS;

Просмотреть файл

@ -191,11 +191,9 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
{
/* allocate pml specific comm data */
mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t);
opal_list_item_t *item, *next_item;
mca_pml_ob1_recv_frag_t* frag;
mca_pml_ob1_recv_frag_t *frag, *next_frag;
mca_pml_ob1_comm_proc_t* pml_proc;
mca_pml_ob1_match_hdr_t* hdr;
int i;
if (NULL == pml_comm) {
return OMPI_ERR_OUT_OF_RESOURCE;
@ -210,16 +208,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
comm->c_pml_comm = pml_comm;
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i);
OBJ_RETAIN(pml_comm->procs[i].ompi_proc);
}
/* Grab all related messages from the non_existing_communicator pending queue */
for( item = opal_list_get_first(&mca_pml_ob1.non_existing_communicator_pending);
item != opal_list_get_end(&mca_pml_ob1.non_existing_communicator_pending);
item = next_item ) {
frag = (mca_pml_ob1_recv_frag_t*)item;
next_item = opal_list_get_next(item);
OPAL_LIST_FOREACH_SAFE(frag, next_frag, &mca_pml_ob1.non_existing_communicator_pending, mca_pml_ob1_recv_frag_t) {
hdr = &frag->hdr.hdr_match;
/* Is this fragment for the current communicator ? */
@ -229,8 +219,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
/* As we now know we work on a fragment for this communicator
* we should remove it from the
* non_existing_communicator_pending list. */
opal_list_remove_item( &mca_pml_ob1.non_existing_communicator_pending,
item );
opal_list_remove_item (&mca_pml_ob1.non_existing_communicator_pending,
(opal_list_item_t *) frag);
add_fragment_to_unexpected:
@ -249,7 +239,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
* We just have to push the fragment into the unexpected list of the corresponding
* proc, or into the out-of-order (cant_match) list.
*/
pml_proc = &(pml_comm->procs[hdr->hdr_src]);
pml_proc = mca_pml_ob1_peer_lookup(comm, hdr->hdr_src);
if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) {
/* We're now expecting the next sequence number. */
@ -283,12 +273,6 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
{
mca_pml_ob1_comm_t* pml_comm = comm->c_pml_comm;
int i;
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
OBJ_RELEASE(pml_comm->procs[i].ompi_proc);
}
OBJ_RELEASE(comm->c_pml_comm);
comm->c_pml_comm = NULL;
return OMPI_SUCCESS;
@ -303,9 +287,9 @@ int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
{
mca_btl_base_selected_module_t *sm;
opal_bitmap_t reachable;
int rc;
opal_list_item_t *item;
if(nprocs == 0)
return OMPI_SUCCESS;
@ -347,11 +331,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
BTLs requires iterating over the procs, as the BML does not
expose all currently in use btls. */
for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ;
item != opal_list_get_end(&mca_btl_base_modules_initialized) ;
item = opal_list_get_next(item)) {
mca_btl_base_selected_module_t *sm =
(mca_btl_base_selected_module_t*) item;
OPAL_LIST_FOREACH(sm, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_ob1_hdr_t)) {
opal_show_help("help-mpi-pml-ob1.txt", "eager_limit_too_small",
true,
@ -589,13 +569,19 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose)
/* iterate through all procs on communicator */
for( i = 0; i < (int)pml_comm->num_procs; i++ ) {
mca_pml_ob1_comm_proc_t* proc = &pml_comm->procs[i];
mca_pml_ob1_comm_proc_t* proc = pml_comm->procs[i];
if (NULL == proc) {
continue;
}
mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
size_t n;
opal_output(0, "[Rank %d] expected_seq %d ompi_proc %p send_seq %d\n",
i, proc->expected_sequence, (void*) proc->ompi_proc,
proc->send_sequence);
/* dump all receive queues */
if( opal_list_get_size(&proc->specific_receives) ) {
opal_output(0, "expected specific receives\n");

Просмотреть файл

@ -40,14 +40,15 @@ static void mca_pml_ob1_comm_proc_destruct(mca_pml_ob1_comm_proc_t* proc)
OBJ_DESTRUCT(&proc->frags_cant_match);
OBJ_DESTRUCT(&proc->specific_receives);
OBJ_DESTRUCT(&proc->unexpected_frags);
if (proc->ompi_proc) {
OBJ_RELEASE(proc->ompi_proc);
}
}
static OBJ_CLASS_INSTANCE(
mca_pml_ob1_comm_proc_t,
opal_object_t,
mca_pml_ob1_comm_proc_construct,
mca_pml_ob1_comm_proc_destruct);
OBJ_CLASS_INSTANCE(mca_pml_ob1_comm_proc_t, opal_object_t,
mca_pml_ob1_comm_proc_construct,
mca_pml_ob1_comm_proc_destruct);
static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm)
@ -63,11 +64,16 @@ static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm)
static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm)
{
size_t i;
for(i=0; i<comm->num_procs; i++)
OBJ_DESTRUCT((&comm->procs[i]));
if(NULL != comm->procs)
if (NULL != comm->procs) {
for (size_t i = 0; i < comm->num_procs; ++i) {
if (comm->procs[i]) {
OBJ_RELEASE(comm->procs[i]);
}
}
free(comm->procs);
}
OBJ_DESTRUCT(&comm->wild_receives);
OBJ_DESTRUCT(&comm->matching_lock);
}
@ -80,18 +86,13 @@ OBJ_CLASS_INSTANCE(
mca_pml_ob1_comm_destruct);
int mca_pml_ob1_comm_init_size(mca_pml_ob1_comm_t* comm, size_t size)
int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size)
{
size_t i;
/* send message sequence-number support - sender side */
comm->procs = (mca_pml_ob1_comm_proc_t*)malloc(sizeof(mca_pml_ob1_comm_proc_t)*size);
comm->procs = (mca_pml_ob1_comm_proc_t **) calloc(size, sizeof (mca_pml_ob1_comm_proc_t *));
if(NULL == comm->procs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(i=0; i<size; i++) {
OBJ_CONSTRUCT(comm->procs+i, mca_pml_ob1_comm_proc_t);
}
comm->num_procs = size;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -24,6 +24,7 @@
#include "opal/threads/mutex.h"
#include "opal/class/opal_list.h"
#include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
BEGIN_C_DECLS
@ -42,6 +43,7 @@ struct mca_pml_ob1_comm_proc_t {
};
typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t;
OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_proc_t);
/**
* Cached on ompi_communicator_t to hold queues/state
@ -56,7 +58,7 @@ struct mca_pml_comm_t {
#endif
opal_mutex_t matching_lock; /**< matching lock */
opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */
mca_pml_ob1_comm_proc_t* procs;
mca_pml_ob1_comm_proc_t **procs;
size_t num_procs;
size_t last_probed;
};
@ -64,6 +66,18 @@ typedef struct mca_pml_comm_t mca_pml_ob1_comm_t;
OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t);
static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_communicator_t *comm, int rank)
{
mca_pml_ob1_comm_t *pml_comm = (mca_pml_ob1_comm_t *)comm->c_pml_comm;
if (OPAL_UNLIKELY(NULL == pml_comm->procs[rank])) {
pml_comm->procs[rank] = OBJ_NEW(mca_pml_ob1_comm_proc_t);
pml_comm->procs[rank]->ompi_proc = ompi_comm_peer_lookup (comm, rank);
OBJ_RETAIN(pml_comm->procs[rank]->ompi_proc);
}
return pml_comm->procs[rank];
}
/**
* Initialize an instance of mca_pml_ob1_comm_t based on the communicator size.

Просмотреть файл

@ -144,9 +144,12 @@ static int mca_pml_ob1_get_unex_msgq_size (const struct mca_base_pvar_t *pvar, v
int i;
for (i = 0 ; i < comm_size ; ++i) {
pml_proc = pml_comm->procs + i;
values[i] = opal_list_get_size (&pml_proc->unexpected_frags);
pml_proc = pml_comm->procs[i];
if (pml_proc) {
values[i] = opal_list_get_size (&pml_proc->unexpected_frags);
} else {
values[i] = 0;
}
}
return OMPI_SUCCESS;
@ -162,9 +165,13 @@ static int mca_pml_ob1_get_posted_recvq_size (const struct mca_base_pvar_t *pvar
int i;
for (i = 0 ; i < comm_size ; ++i) {
pml_proc = pml_comm->procs + i;
pml_proc = pml_comm->procs[i];
values[i] = opal_list_get_size (&pml_proc->specific_receives);
if (pml_proc) {
values[i] = opal_list_get_size (&pml_proc->specific_receives);
} else {
values[i] = 0;
}
}
return OMPI_SUCCESS;

Просмотреть файл

@ -148,7 +148,6 @@ mca_pml_ob1_imrecv( void *buf,
int src, tag;
ompi_communicator_t *comm;
mca_pml_ob1_comm_proc_t* proc;
mca_pml_ob1_comm_t* ob1_comm;
uint64_t seq;
/* get the request from the message and the frag from the request
@ -158,7 +157,6 @@ mca_pml_ob1_imrecv( void *buf,
src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
comm = (*message)->comm;
ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
seq = recvreq->req_recv.req_base.req_sequence;
/* make the request a recv request again */
@ -196,7 +194,7 @@ mca_pml_ob1_imrecv( void *buf,
/* Note - sequence number already assigned */
recvreq->req_recv.req_base.req_sequence = seq;
proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer];
proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
prepare_recv_req_converter(recvreq);
@ -243,7 +241,6 @@ mca_pml_ob1_mrecv( void *buf,
int src, tag, rc;
ompi_communicator_t *comm;
mca_pml_ob1_comm_proc_t* proc;
mca_pml_ob1_comm_t* ob1_comm;
uint64_t seq;
/* get the request from the message and the frag from the request
@ -254,7 +251,6 @@ mca_pml_ob1_mrecv( void *buf,
src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
seq = recvreq->req_recv.req_base.req_sequence;
ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
/* make the request a recv request again */
/* The old request kept pointers to comm and the char datatype.
@ -290,7 +286,7 @@ mca_pml_ob1_mrecv( void *buf,
/* Note - sequence number already assigned */
recvreq->req_recv.req_base.req_sequence = seq;
proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer];
proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
prepare_recv_req_converter(recvreq);

Просмотреть файл

@ -126,15 +126,14 @@ int mca_pml_ob1_isend(const void *buf,
ompi_communicator_t * comm,
ompi_request_t ** request)
{
mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm;
mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst);
mca_pml_ob1_send_request_t *sendreq = NULL;
ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst);
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
ompi_proc_t *dst_proc = ob1_proc->ompi_proc;
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc);
int16_t seqn;
int rc;
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1);
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc,
@ -176,10 +175,9 @@ int mca_pml_ob1_send(const void *buf,
mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm)
{
mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm;
ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst);
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst);
ompi_proc_t *dst_proc = ob1_proc->ompi_proc;
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc);
mca_pml_ob1_send_request_t *sendreq = NULL;
int16_t seqn;
int rc;
@ -202,7 +200,7 @@ int mca_pml_ob1_send(const void *buf,
return OMPI_ERR_UNREACH;
}
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1);
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
/**
* The immediate send will not have a request, so they are

Просмотреть файл

@ -143,7 +143,7 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl,
comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
/* source sequence number */
proc = &comm->procs[hdr->hdr_src];
proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src);
/* We generate the MSG_ARRIVED event as soon as the PML is aware
* of a matching fragment arrival. Independing if it is received
@ -650,7 +650,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
/* source sequence number */
frag_msg_seq = hdr->hdr_seq;
proc = &comm->procs[hdr->hdr_src];
proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src);
/**
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching

Просмотреть файл

@ -100,7 +100,8 @@ static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request)
static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, int complete)
{
mca_pml_ob1_recv_request_t* request = (mca_pml_ob1_recv_request_t*)ompi_request;
mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm;
ompi_communicator_t *comm = request->req_recv.req_base.req_comm;
mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm;
if( true == request->req_match_received ) { /* way to late to cancel this one */
assert( OMPI_ANY_TAG != ompi_request->req_status.MPI_TAG ); /* not matched isn't it */
@ -108,11 +109,11 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
}
/* The rest should be protected behind the match logic lock */
OPAL_THREAD_LOCK(&comm->matching_lock);
OPAL_THREAD_LOCK(&ob1_comm->matching_lock);
if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) {
opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request );
opal_list_remove_item( &ob1_comm->wild_receives, (opal_list_item_t*)request );
} else {
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
mca_pml_ob1_comm_proc_t* proc = mca_pml_ob1_peer_lookup (comm, request->req_recv.req_base.req_peer);
opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request);
}
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
@ -122,7 +123,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
* to true. Otherwise, the request will never be freed.
*/
request->req_recv.req_base.req_pml_complete = true;
OPAL_THREAD_UNLOCK(&comm->matching_lock);
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
OPAL_THREAD_LOCK(&ompi_request_lock);
ompi_request->req_status._cancelled = true;
@ -260,7 +261,7 @@ static int mca_pml_ob1_recv_request_ack(
ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc;
mca_bml_base_endpoint_t* bml_endpoint = NULL;
bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
bml_endpoint = mca_bml_base_get_endpoint (proc);
/* by default copy everything */
recvreq->req_send_offset = bytes_received;
@ -654,7 +655,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
}
/* lookup bml datastructures */
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
bml_endpoint = mca_bml_base_get_endpoint (recvreq->req_recv.req_base.req_proc);
rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
#if OPAL_CUDA_SUPPORT
@ -1079,8 +1080,11 @@ static mca_pml_ob1_recv_frag_t*
recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req,
mca_pml_ob1_comm_proc_t *proc )
{
if (NULL == proc) {
return NULL;
}
opal_list_t* unexpected_frags = &proc->unexpected_frags;
opal_list_item_t *i;
mca_pml_ob1_recv_frag_t* frag;
int tag = req->req_recv.req_base.req_tag;
@ -1088,20 +1092,12 @@ recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req,
return NULL;
if( OMPI_ANY_TAG == tag ) {
for (i = opal_list_get_first(unexpected_frags);
i != opal_list_get_end(unexpected_frags);
i = opal_list_get_next(i)) {
frag = (mca_pml_ob1_recv_frag_t*)i;
OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) {
if( frag->hdr.hdr_match.hdr_tag >= 0 )
return frag;
}
} else {
for (i = opal_list_get_first(unexpected_frags);
i != opal_list_get_end(unexpected_frags);
i = opal_list_get_next(i)) {
frag = (mca_pml_ob1_recv_frag_t*)i;
OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) {
if( frag->hdr.hdr_match.hdr_tag == tag )
return frag;
}
@ -1118,7 +1114,7 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
mca_pml_ob1_comm_proc_t **p)
{
mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm;
mca_pml_ob1_comm_proc_t* proc = comm->procs;
mca_pml_ob1_comm_proc_t **procp = comm->procs;
size_t i;
/*
@ -1133,10 +1129,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
mca_pml_ob1_recv_frag_t* frag;
/* loop over messages from the current proc */
if((frag = recv_req_match_specific_proc(req, &proc[i]))) {
*p = &proc[i];
if((frag = recv_req_match_specific_proc(req, procp[i]))) {
*p = procp[i];
comm->last_probed = i;
req->req_recv.req_base.req_proc = proc[i].ompi_proc;
req->req_recv.req_base.req_proc = procp[i]->ompi_proc;
prepare_recv_req_converter(req);
return frag; /* match found */
}
@ -1145,10 +1141,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
mca_pml_ob1_recv_frag_t* frag;
/* loop over messages from the current proc */
if((frag = recv_req_match_specific_proc(req, &proc[i]))) {
*p = &proc[i];
if((frag = recv_req_match_specific_proc(req, procp[i]))) {
*p = procp[i];
comm->last_probed = i;
req->req_recv.req_base.req_proc = proc[i].ompi_proc;
req->req_recv.req_base.req_proc = procp[i]->ompi_proc;
prepare_recv_req_converter(req);
return frag; /* match found */
}
@ -1161,7 +1157,8 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
{
mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm;
ompi_communicator_t *comm = req->req_recv.req_base.req_comm;
mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm;
mca_pml_ob1_comm_proc_t* proc;
mca_pml_ob1_recv_frag_t* frag;
opal_list_t *queue;
@ -1179,7 +1176,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
MCA_PML_BASE_RECV_START(&req->req_recv.req_base);
OPAL_THREAD_LOCK(&comm->matching_lock);
OPAL_THREAD_LOCK(&ob1_comm->matching_lock);
/**
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
* the cost of the request lock.
@ -1188,12 +1185,12 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
&(req->req_recv.req_base), PERUSE_RECV);
/* assign sequence number */
req->req_recv.req_base.req_sequence = comm->recv_sequence++;
req->req_recv.req_base.req_sequence = ob1_comm->recv_sequence++;
/* attempt to match posted recv */
if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) {
frag = recv_req_match_wild(req, &proc);
queue = &comm->wild_receives;
queue = &ob1_comm->wild_receives;
#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* As we are in a homogeneous environment we know that all remote
* architectures are exactly the same as the local one. Therefore,
@ -1206,7 +1203,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
}
#endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
} else {
proc = &comm->procs[req->req_recv.req_base.req_peer];
proc = mca_pml_ob1_peer_lookup (comm, req->req_recv.req_base.req_peer);
req->req_recv.req_base.req_proc = proc->ompi_proc;
frag = recv_req_match_specific_proc(req, proc);
queue = &proc->specific_receives;
@ -1221,7 +1218,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
it when the message comes in. */
append_recv_req_to_queue(queue, req);
req->req_match_received = false;
OPAL_THREAD_UNLOCK(&comm->matching_lock);
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
} else {
if(OPAL_LIKELY(!IS_PROB_REQ(req))) {
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX,
@ -1239,7 +1236,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
opal_list_remove_item(&proc->unexpected_frags,
(opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&comm->matching_lock);
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_OB1_HDR_TYPE_MATCH:
@ -1269,14 +1266,14 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
restarted with this request during mrecv */
opal_list_remove_item(&proc->unexpected_frags,
(opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&comm->matching_lock);
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
req->req_recv.req_base.req_addr = frag;
mca_pml_ob1_recv_request_matched_probe(req, frag->btl,
frag->segments, frag->num_segments);
} else {
OPAL_THREAD_UNLOCK(&comm->matching_lock);
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
mca_pml_ob1_recv_request_matched_probe(req, frag->btl,
frag->segments, frag->num_segments);
}

Просмотреть файл

@ -433,8 +433,7 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
{
size_t i;
mca_bml_base_btl_t* bml_btl;
mca_bml_base_endpoint_t* endpoint =
(mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);

Просмотреть файл

@ -480,16 +480,16 @@ mca_pml_ob1_send_request_start_seq (mca_pml_ob1_send_request_t* sendreq, mca_bml
static inline int
mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
{
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm;
mca_bml_base_endpoint_t *endpoint = mca_bml_base_get_endpoint (sendreq->req_send.req_base.req_proc);
ompi_communicator_t *comm = sendreq->req_send.req_base.req_comm;
mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, sendreq->req_send.req_base.req_peer);
int32_t seqn;
if (OPAL_UNLIKELY(NULL == endpoint)) {
return OMPI_ERR_UNREACH;
}
seqn = OPAL_THREAD_ADD32(&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence, 1);
seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,7 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
@ -43,6 +44,8 @@
static opal_list_t ompi_proc_list;
static opal_mutex_t ompi_proc_lock;
static opal_hash_table_t ompi_proc_hash;
ompi_proc_t* ompi_proc_local_proc = NULL;
static void ompi_proc_construct(ompi_proc_t* proc);
@ -83,49 +86,223 @@ void ompi_proc_destruct(ompi_proc_t* proc)
}
OPAL_THREAD_LOCK(&ompi_proc_lock);
opal_list_remove_item(&ompi_proc_list, (opal_list_item_t*)proc);
opal_hash_table_remove_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name));
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
}
/**
* Allocate a new ompi_proc_T for the given jobid/vpid
*
* @param[in] jobid Job identifier
* @param[in] vpid Process identifier
* @param[out] procp New ompi_proc_t structure
*
* This function allocates a new ompi_proc_t and inserts it into
* the process list and hash table.
*/
static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t **procp) {
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = jobid;
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = vpid;
opal_hash_table_set_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name),
proc);
*procp = proc;
return OMPI_SUCCESS;
}
/**
* Finish setting up an ompi_proc_t
*
* @param[in] proc ompi process structure
*
* This function contains the core code of ompi_proc_complete_init() and
* ompi_proc_refresh(). The tasks performed by this function include
* retrieving the hostname (if below the modex cutoff), determining the
* remote architecture, and calculating the locality of the process.
*/
static int ompi_proc_complete_init_single (ompi_proc_t *proc)
{
uint16_t u16, *u16ptr;
int ret;
u16ptr = &u16;
if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid) {
/* nothing else to do */
return OMPI_SUCCESS;
}
/* get the locality information - all RTEs are required
* to provide this information at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS != ret) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
proc->super.proc_flags = u16;
}
/* we can retrieve the hostname at no cost because it
* was provided at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
return ret;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* get the remote architecture - this might force a modex except
* for those environments where the RM provides it */
{
uint32_t *ui32ptr;
ui32ptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
(void**)&ui32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
proc->super.proc_arch = opal_local_arch;
} else {
return ret;
}
}
#else
/* must be same arch as my own */
proc->super.proc_arch = opal_local_arch;
#endif
return OMPI_SUCCESS;
}
opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name)
{
ompi_proc_t *proc = NULL;
int ret;
/* try to lookup the value in the hash table */
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
if (OPAL_SUCCESS == ret) {
return &proc->super;
}
return NULL;
}
opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name)
{
ompi_proc_t *proc = NULL;
int ret;
/* try to lookup the value in the hash table */
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
if (OPAL_SUCCESS == ret) {
return &proc->super;
}
OPAL_THREAD_LOCK(&ompi_proc_lock);
do {
/* double-check that another competing thread has not added this proc */
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
if (OPAL_SUCCESS == ret) {
break;
}
/* allocate a new ompi_proc_t object for the process and insert it into the process table */
ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
/* allocation fail */
break;
}
/* finish filling in the important proc data fields */
ret = ompi_proc_complete_init_single (proc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
proc = NULL;
break;
}
} while (0);
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return (opal_proc_t *) proc;
}
int ompi_proc_init(void)
{
ompi_vpid_t i;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
int opal_proc_hash_init_size = (ompi_process_info.num_procs < ompi_add_procs_cutoff) ? ompi_process_info.num_procs :
1024;
ompi_proc_t *proc;
int ret;
#endif
OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t);
OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ompi_proc_hash, opal_hash_table_t);
/* create proc structures and find self */
for( i = 0; i < ompi_process_info.num_procs; i++ ) {
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
ret = opal_hash_table_init (&ompi_proc_hash, opal_proc_hash_init_size);
if (OPAL_SUCCESS != ret) {
return ret;
}
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid;
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = i;
/* create a proc for the local process */
ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, OMPI_PROC_MY_NAME->vpid, &proc);
if (OMPI_SUCCESS != ret) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (i == OMPI_PROC_MY_NAME->vpid) {
ompi_proc_local_proc = proc;
proc->super.proc_flags = OPAL_PROC_ALL_LOCAL;
proc->super.proc_hostname = strdup(ompi_process_info.nodename);
proc->super.proc_arch = opal_local_arch;
/* Register the local proc with OPAL */
opal_proc_local_set(&proc->super);
/* set local process data */
ompi_proc_local_proc = proc;
proc->super.proc_flags = OPAL_PROC_ALL_LOCAL;
proc->super.proc_hostname = strdup(ompi_process_info.nodename);
proc->super.proc_arch = opal_local_arch;
/* Register the local proc with OPAL */
opal_proc_local_set(&proc->super);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* add our arch to the modex */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL,
OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32);
if (OPAL_SUCCESS != ret) {
/* add our arch to the modex */
OPAL_MODEX_SEND_VALUE(ret, PMIX_GLOBAL,
OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32);
if (OPAL_SUCCESS != ret) {
return ret;
}
#endif
if (ompi_process_info.num_procs < ompi_add_procs_cutoff) {
/* create proc structures and find self */
for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) {
if (i == OMPI_PROC_MY_NAME->vpid) {
continue;
}
ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, i, &proc);
if (OMPI_SUCCESS != ret) {
return ret;
}
#endif
}
}
return OMPI_SUCCESS;
}
static int ompi_proc_compare_vid (opal_list_item_t **a, opal_list_item_t **b)
{
ompi_proc_t *proca = (ompi_proc_t *) *a;
ompi_proc_t *procb = (ompi_proc_t *) *b;
if (proca->super.proc_name.vpid > procb->super.proc_name.vpid) {
return 1;
} else {
return -1;
}
/* they should never be equal */
}
/**
* The process creation is split into two steps. The second step
@ -140,58 +317,47 @@ int ompi_proc_complete_init(void)
{
ompi_proc_t *proc;
int ret, errcode = OMPI_SUCCESS;
uint16_t u16, *u16ptr;
OPAL_THREAD_LOCK(&ompi_proc_lock);
u16ptr = &u16;
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid != OMPI_PROC_MY_NAME->vpid) {
/* get the locality information - all RTEs are required
* to provide this information at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS != ret) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
proc->super.proc_flags = u16;
}
/* we can retrieve the hostname at no cost because it
* was provided at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
/* we can live without it */
proc->super.proc_hostname = NULL;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* get the remote architecture - this might force a modex except
* for those environments where the RM provides it */
{
uint32_t *ui32ptr;
ui32ptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
(void**)&ui32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
proc->super.proc_arch = opal_local_arch;
} else {
errcode = ret;
break;
}
}
#else
/* must be same arch as my own */
proc->super.proc_arch = opal_local_arch;
#endif
ret = ompi_proc_complete_init_single (proc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
errcode = ret;
break;
}
}
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
if (ompi_process_info.num_procs >= ompi_add_procs_cutoff) {
uint16_t u16, *u16ptr;
u16ptr = &u16;
/* find and add all local processes */
for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) {
opal_process_name_t proc_name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
uint16_t locality = OPAL_PROC_NON_LOCAL;
if (OMPI_PROC_MY_NAME->vpid == i) {
continue;
}
/* the runtime is required to fill in locality for all local processes by this
* point. only local processes will have locality set */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS == ret) {
locality = u16;
}
if (OPAL_PROC_NON_LOCAL != locality) {
(void) ompi_proc_for_name (proc_name);
}
}
}
opal_list_sort (&ompi_proc_list, ompi_proc_compare_vid);
return errcode;
}
@ -227,6 +393,7 @@ int ompi_proc_finalize (void)
/* now destruct the list and thread lock */
OBJ_DESTRUCT(&ompi_proc_list);
OBJ_DESTRUCT(&ompi_proc_lock);
OBJ_DESTRUCT(&ompi_proc_hash);
return OMPI_SUCCESS;
}
@ -248,9 +415,7 @@ ompi_proc_t** ompi_proc_world(size_t *size)
/* First count how many match this jobid */
OPAL_THREAD_LOCK(&ompi_proc_lock);
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, OMPI_CAST_RTE_NAME(&proc->super.proc_name), &my_name)) {
++count;
}
@ -265,9 +430,7 @@ ompi_proc_t** ompi_proc_world(size_t *size)
/* now save only the procs that match this jobid */
count = 0;
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, &my_name)) {
/* DO NOT RETAIN THIS OBJECT - the reference count on this
* object will be adjusted by external callers. The intent
@ -305,9 +468,7 @@ ompi_proc_t** ompi_proc_all(size_t* size)
}
OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
/* We know this isn't consistent with the behavior in ompi_proc_world,
* but we are leaving the RETAIN for now because the code using this function
* assumes that the results need to be released when done. It will
@ -349,9 +510,7 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name )
/* return the proc-struct which matches this jobid+process id */
mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) {
rproc = proc;
break;
@ -366,21 +525,14 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name )
int ompi_proc_refresh(void)
{
ompi_proc_t *proc = NULL;
opal_list_item_t *item = NULL;
ompi_vpid_t i = 0;
int ret=OMPI_SUCCESS;
uint16_t u16, *u16ptr;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for( item = opal_list_get_first(&ompi_proc_list), i = 0;
item != opal_list_get_end(&ompi_proc_list);
item = opal_list_get_next(item), ++i ) {
proc = (ompi_proc_t*)item;
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
/* Does not change: proc->super.proc_name.vpid */
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid;
u16ptr = &u16;
/* Make sure to clear the local flag before we set it below */
proc->super.proc_flags = 0;
@ -392,56 +544,10 @@ int ompi_proc_refresh(void)
proc->super.proc_arch = opal_local_arch;
opal_proc_local_set(&proc->super);
} else {
/* get the locality information - all RTEs are required
* to provide this information at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS != ret) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
proc->super.proc_flags = u16;
ret = ompi_proc_complete_init_single (proc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
break;
}
if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) {
/* IF the number of procs falls below the specified cutoff,
* then we assume the job is small enough that retrieving
* the hostname (which will typically cause retrieval of
* ALL modex info for this proc) will have no appreciable
* impact on launch scaling
*/
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OMPI_SUCCESS != ret) {
break;
}
} else {
/* just set the hostname to NULL for now - we'll fill it in
* as modex_recv's are called for procs we will talk to, thus
* avoiding retrieval of ALL modex info for this proc until
* required. Transports that delay calling modex_recv until
* first message will therefore scale better than those that
* call modex_recv on all procs during init.
*/
proc->super.proc_hostname = NULL;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
{
/* get the remote architecture */
uint32_t* uiptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
(void**)&uiptr, OPAL_UINT32);
if (OMPI_SUCCESS != ret) {
break;
}
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
}
#else
/* must be same arch as my own */
proc->super.proc_arch = opal_local_arch;
#endif
}
}
@ -454,7 +560,7 @@ int
ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
opal_buffer_t* buf)
{
int i, rc;
int rc;
OPAL_THREAD_LOCK(&ompi_proc_lock);
@ -470,7 +576,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
* reduced. For now, just go ahead and pack the info so it
* can be sent.
*/
for (i=0; i<proclistsize; i++) {
for (int i = 0 ; i < proclistsize ; ++i) {
rc = opal_dss.pack(buf, &(proclist[i]->super.proc_name), 1, OMPI_NAME);
if(rc != OPAL_SUCCESS) {
OMPI_ERROR_LOG(rc);
@ -503,9 +609,7 @@ ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew)
/* return the proc-struct which matches this jobid+process id */
mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) {
rproc = proc;
*isnew = false;
@ -538,7 +642,6 @@ ompi_proc_unpack(opal_buffer_t* buf,
int proclistsize, ompi_proc_t ***proclist,
int *newproclistsize, ompi_proc_t ***newproclist)
{
int i;
size_t newprocs_len = 0;
ompi_proc_t **plist=NULL, **newprocs = NULL;
@ -558,7 +661,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
/* cycle through the array of provided procs and unpack
* their info - as packed by ompi_proc_pack
*/
for (i=0; i<proclistsize; i++){
for (int i = 0; i < proclistsize ; ++i){
int32_t count=1;
ompi_process_name_t new_name;
uint32_t new_arch;

Просмотреть файл

@ -304,6 +304,35 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf,
*/
OMPI_DECLSPEC int ompi_proc_refresh(void);
/**
* Get the ompi_proc_t for a given process name
*
* @param[in] proc_name opal process name
*
* @returns cached or new ompi_proc_t for the given process name
*
* This function looks up the given process name in the hash of existing
* ompi_proc_t structures. If no ompi_proc_t structure exists matching the
* given name a new ompi_proc_t is allocated, initialized, and returned.
*
* @note The ompi_proc_t is added to the local list of processes but is not
* added to any communicator. ompi_comm_peer_lookup is responsible for caching
* the ompi_proc_t on a communicator.
*/
OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name);
OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name);
static inline intptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) {
return -*((intptr_t *) &name);
}
static inline opal_process_name_t ompi_proc_sentinel_to_name (intptr_t sentinel) {
sentinel = -sentinel;
return *((opal_process_name_t *) &sentinel);
}
END_C_DECLS

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -13,6 +14,8 @@
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -88,7 +91,7 @@ static void try_kill_peers(ompi_communicator_t *comm,
} else {
assert(count <= nprocs);
procs[count++] =
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name);
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
}
}
@ -96,7 +99,7 @@ static void try_kill_peers(ompi_communicator_t *comm,
for (i = 0; i < ompi_comm_remote_size(comm); ++i) {
assert(count <= nprocs);
procs[count++] =
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name);
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
}
if (nprocs > 0) {

Просмотреть файл

@ -400,6 +400,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
opal_compare_proc = _process_name_compare;
opal_convert_string_to_process_name = _convert_string_to_process_name;
opal_convert_process_name_to_string = _convert_process_name_to_string;
opal_proc_for_name = ompi_proc_for_name;
/* Register MCA variables */
if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) {

Просмотреть файл

@ -64,6 +64,7 @@ int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
bool ompi_mpi_preconnect_mpi = false;
uint32_t ompi_add_procs_cutoff = 1024;
static bool show_default_mca_params = false;
static bool show_file_mca_params = false;
@ -288,6 +289,16 @@ int ompi_mpi_register_params(void)
ompi_rte_abort(1, NULL);
}
ompi_add_procs_cutoff = 1024;
(void) mca_base_var_register ("ompi", "mpi", NULL, "add_procs_cutoff",
"Maximum world size for pre-allocating resources for all "
"remote processes. Increasing this limit may improve "
"communication performance at the cost of memory usage "
"(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_add_procs_cutoff);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
@ -123,11 +124,16 @@ OMPI_DECLSPEC extern bool ompi_have_sparse_group_storage;
*/
OMPI_DECLSPEC extern bool ompi_use_sparse_group_storage;
/*
/**
* Cutoff point for retrieving hostnames
*/
OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff;
/**
* Cutoff point for calling add_procs for all processes
*/
OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
/**
* Register MCA parameters used by the MPI layer.
*

Просмотреть файл

@ -605,12 +605,15 @@ typedef int (*mca_btl_base_module_finalize_fn_t)(
* modex_recv() function. The BTL may utilize this information to
* determine reachability of each peer process.
*
* For each process that is reachable by the BTL, the bit corresponding to the index
* into the proc array (nprocs) should be set in the reachable bitmask. The BTL
* will return an array of pointers to a data structure defined
* by the BTL that is then returned to the BTL on subsequent calls to the BTL data
* transfer functions (e.g btl_send). This may be used by the BTL to cache any addressing
* or connection information (e.g. TCP socket, IB queue pair).
* The caller may pass a "reachable" bitmap pointer. If it is not
* NULL, for each process that is reachable by the BTL, the bit
* corresponding to the index into the proc array (nprocs) should be
* set in the reachable bitmask. The BTL will return an array of
* pointers to a data structure defined by the BTL that is then
* returned to the BTL on subsequent calls to the BTL data transfer
* functions (e.g btl_send). This may be used by the BTL to cache any
* addressing or connection information (e.g. TCP socket, IB queue
* pair).
*/
typedef int (*mca_btl_base_module_add_procs_fn_t)(
struct mca_btl_base_module_t* btl,

Просмотреть файл

@ -871,6 +871,7 @@ int mca_btl_openib_add_procs(
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
struct opal_proc_t* proc = procs[i];
mca_btl_openib_proc_t* ib_proc;
bool found_existing = false;
int remote_matching_port;
opal_output(-1, "add procs: adding proc %d", i);
@ -898,6 +899,24 @@ int mca_btl_openib_add_procs(
continue;
}
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) {
endpoint = ib_proc->proc_endpoints[j];
if (endpoint->endpoint_btl == openib_btl) {
found_existing = true;
break;
}
}
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
if (found_existing) {
if (reachable) {
opal_bitmap_set_bit(reachable, i);
}
peers[i] = endpoint;
continue;
}
/* check if the remote proc has any ports that:
- on the same subnet as the local proc, and
- on that subnet, has a CPC in common with the local proc
@ -1048,6 +1067,37 @@ int mca_btl_openib_add_procs(
return OPAL_SUCCESS;
}
struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl, struct opal_proc_t *proc)
{
mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl;
mca_btl_base_endpoint_t *endpoint;
mca_btl_openib_proc_t *ib_proc;
if (NULL == (ib_proc = mca_btl_openib_proc_create(proc))) {
/* if we don't have connection info for this process, it's
* okay because some other method might be able to reach it,
* so just mark it as unreachable by us */
return NULL;
}
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) {
endpoint = ib_proc->proc_endpoints[j];
if (endpoint->endpoint_btl == openib_btl) {
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
return endpoint;
}
}
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
BTL_VERBOSE(("creating new endpoint for remote process {.jobid = 0x%x, .vpid = 0x%x}",
proc->proc_name.jobid, proc->proc_name.vpid));
endpoint = NULL;
(void) mca_btl_openib_add_procs (btl, 1, &proc, &endpoint, NULL);
return endpoint;
}
/*
* delete the proc as reachable from this btl module
*/

Просмотреть файл

@ -874,6 +874,18 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp);
const char* btl_openib_get_transport_name(mca_btl_openib_transport_type_t transport_type);
/**
* Get an endpoint for a process
*
* @param btl (IN) BTL module
* @param proc (IN) opal process object
*
* This function will return an existing endpoint if one exists otherwise it will allocate
* a new endpoint and return it.
*/
struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl,
struct opal_proc_t *proc);
/**
* Get a transport type of btl.
*/

Просмотреть файл

@ -565,7 +565,8 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024;
mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024;
mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
MCA_BTL_FLAGS_SEND;
#if BTL_OPENIB_FAILOVER_ENABLED
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT;
#endif

Просмотреть файл

@ -218,6 +218,7 @@ typedef struct udcm_msg_hdr {
union {
/* UDCM_MESSAGE_CONNECT */
struct msg_connect {
opal_process_name_t rem_name;
int32_t rem_ep_index;
uint8_t rem_port_num;
} req;
@ -1473,36 +1474,26 @@ static int udcm_rc_qp_create_all (mca_btl_base_endpoint_t *lcl_ep)
/* JMS: optimization target -- can we send something in private
data to find the proc directly instead of having to search
through *all* procs? */
static mca_btl_openib_endpoint_t *udcm_find_endpoint (opal_pointer_array_t *endpoints,
static mca_btl_openib_endpoint_t *udcm_find_endpoint (struct mca_btl_openib_module_t *btl,
uint32_t qp_num, uint16_t lid,
udcm_msg_hdr_t *msg_hdr)
{
uint8_t port_num;
int i;
mca_btl_base_endpoint_t *endpoint;
struct opal_proc_t *opal_proc;
port_num = msg_hdr->data.req.rem_port_num;
for (i = 0 ; i < opal_pointer_array_get_size (endpoints) ; ++i) {
mca_btl_openib_endpoint_t *endpoint;
modex_msg_t *msg;
endpoint = (mca_btl_openib_endpoint_t *)
opal_pointer_array_get_item (endpoints, i);
if (NULL == endpoint) {
continue;
}
msg = UDCM_ENDPOINT_REM_MODEX(endpoint);
if (msg->mm_qp_num == qp_num && msg->mm_port_num == port_num &&
msg->mm_lid == lid)
return endpoint;
opal_proc = opal_proc_for_name (msg_hdr->data.req.rem_name);
if (NULL == opal_proc) {
BTL_ERROR(("could not get proc associated with remote peer"));
return NULL;
}
BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d",
port_num, lid, msg_hdr->type));
endpoint = mca_btl_openib_get_ep (&btl->super, opal_proc);
if (NULL == endpoint) {
BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d",
msg_hdr->data.req.rem_port_num, lid, msg_hdr->type));
}
return NULL;
return endpoint;
}
static int udcm_endpoint_init_data (mca_btl_base_endpoint_t *lcl_ep)
@ -1678,6 +1669,7 @@ static int udcm_send_request (mca_btl_base_endpoint_t *lcl_ep,
msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index);
msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num;
msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME;
for (i = 0 ; i < mca_btl_openib_component.num_qps ; ++i) {
msg->data->qps[i].psn = htonl(lcl_ep->qps[i].qp->lcl_psn);
@ -1981,8 +1973,7 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m)
lcl_ep = message->hdr.lcl_ep;
if (NULL == lcl_ep) {
lcl_ep = udcm_find_endpoint (m->btl->device->endpoints, wc[i].src_qp,
wc[i].slid, &message->hdr);
lcl_ep = udcm_find_endpoint (m->btl, wc[i].src_qp, wc[i].slid, &message->hdr);
}
if (NULL == lcl_ep ) {
@ -2824,6 +2815,7 @@ static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_
msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index);
msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num;
msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME;
if (UDCM_MESSAGE_XCONNECT == msg_type) {
BTL_VERBOSE(("Sending XConnect with qp: %d, psn: %d", lcl_ep->qps[0].qp->lcl_qp->qp_num,

Просмотреть файл

@ -221,7 +221,8 @@ mca_btl_portals4_component_open(void)
mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
mca_btl_portals4_module.super.btl_flags =
MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_RDMA_MATCHED;
MCA_BTL_FLAGS_RDMA_MATCHED |
MCA_BTL_FLAGS_SEND;
mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

Просмотреть файл

@ -98,7 +98,7 @@ static int mca_btl_self_component_register(void)
mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX;
mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
mca_btl_self.btl_min_rdma_pipeline_size = 0;
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
mca_btl_self.btl_bandwidth = 100;
mca_btl_self.btl_latency = 0;
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
@ -72,6 +72,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
struct opal_proc_t* opal_proc = procs[i];
mca_btl_tcp_proc_t* tcp_proc;
mca_btl_base_endpoint_t* tcp_endpoint;
bool existing_found = false;
/* Do not create loopback TCP connections */
if( my_proc == opal_proc ) {
@ -90,28 +91,43 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
/* The btl_proc datastructure is shared by all TCP BTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
for (int j = 0 ; j < tcp_proc->proc_endpoint_count ; ++j) {
tcp_endpoint = tcp_proc->proc_endpoints[j];
if (tcp_endpoint->endpoint_btl == tcp_btl) {
existing_found = true;
break;
}
}
tcp_endpoint->endpoint_btl = tcp_btl;
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OPAL_SUCCESS) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(tcp_endpoint);
continue;
if (!existing_found) {
/* The btl_proc datastructure is shared by all TCP BTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
tcp_endpoint->endpoint_btl = tcp_btl;
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OPAL_SUCCESS) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(tcp_endpoint);
continue;
}
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
}
opal_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
if (NULL != reachable) {
opal_bitmap_set_bit(reachable, i);
}
peers[i] = tcp_endpoint;
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
/* we increase the count of MPI users of the event library
once per peer, so that we are used until we aren't

Просмотреть файл

@ -269,7 +269,8 @@ static int mca_btl_tcp_component_register(void)
MCA_BTL_FLAGS_SEND_INPLACE |
MCA_BTL_FLAGS_NEED_CSUM |
MCA_BTL_FLAGS_NEED_ACK |
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
MCA_BTL_FLAGS_SEND;
mca_btl_tcp_module.super.btl_bandwidth = 100;
mca_btl_tcp_module.super.btl_latency = 100;

Просмотреть файл

@ -14,7 +14,9 @@
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -738,6 +740,31 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
*name, (void**)&proc);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
if (OPAL_UNLIKELY(NULL == proc)) {
mca_btl_base_endpoint_t *endpoint;
opal_proc_t *opal_proc;
int rc;
BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}",
name->jobid, name->vpid));
opal_proc = opal_proc_for_name (*name);
if (NULL == opal_proc) {
return NULL;
}
/* try adding this proc to each btl until */
for (int i = 0 ; i < mca_btl_tcp_component.tcp_num_btls ; ++i) {
endpoint = NULL;
(void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc,
&endpoint, NULL);
if (NULL != endpoint && NULL == proc) {
/* get the proc and continue on (could probably just break here) */
proc = endpoint->endpoint_proc;
}
}
}
return proc;
}

Просмотреть файл

@ -49,7 +49,7 @@
/* ompi and smsg endpoint attributes */
typedef struct mca_btl_ugni_endpoint_attr_t {
uint64_t proc_id;
opal_process_name_t proc_name;
uint32_t index;
gni_smsg_attr_t smsg_attr;
gni_mem_handle_t rmt_irq_mem_hndl;
@ -67,6 +67,7 @@ typedef struct mca_btl_ugni_module_t {
opal_common_ugni_device_t *device;
opal_mutex_t endpoint_lock;
size_t endpoint_count;
opal_pointer_array_t endpoints;
opal_hash_table_t id_to_endpoint;
@ -229,6 +230,8 @@ mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers);
struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc);
/**
* Initiate an asynchronous send.
*

Просмотреть файл

@ -28,13 +28,11 @@ static void
mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs);
static int mca_btl_ugni_smsg_setup (int nprocs);
int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers,
opal_bitmap_t *reachable) {
int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers,
opal_bitmap_t *reachable) {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
size_t i;
int rc;
void *mmap_start_addr;
@ -59,36 +57,45 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
}
}
for (i = 0 ; i < nprocs ; ++i) {
for (size_t i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
ugni_module->nlocal_procs++;
/* check for an existing endpoint */
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) {
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
ugni_module->nlocal_procs++;
/* ugni is allowed on local processes to provide support for network
* atomic operations */
/* ugni is allowed on local processes to provide support for network
* atomic operations */
}
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
BTL_ERROR(("btl/ugni error initializing endpoint"));
return rc;
}
/* go ahead and connect the local endpoint for RDMA/CQ write */
if (opal_proc == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
++ugni_module->endpoint_count;
}
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/ugni error initializing endpoint"));
return rc;
/* Set the reachable bit if necessary */
if (reachable) {
rc = opal_bitmap_set_bit (reachable, i);
}
/* go ahead and connect the local endpoint for RDMA/CQ write */
if (opal_proc == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
/* Set the reachable bit */
rc = opal_bitmap_set_bit (reachable, i);
++ugni_module->endpoint_count;
}
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
@ -224,6 +231,41 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
return OPAL_SUCCESS;
}
struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) module;
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(proc->proc_name);
mca_btl_base_endpoint_t *ep;
int rc;
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
do {
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
if (OPAL_SUCCESS == rc) {
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
break;
}
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/ugni error initializing endpoint"));
break;
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep);
} while (0);
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
return ep;
}
static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{

Просмотреть файл

@ -386,8 +386,8 @@ mca_btl_ugni_component_init (int *num_btl_modules,
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
{
uint64_t datagram_id, data, proc_id;
uint32_t remote_addr, remote_id;
uint64_t datagram_id, data;
mca_btl_base_endpoint_t *ep;
gni_post_state_t post_state;
gni_ep_handle_t handle;
@ -425,15 +425,24 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
if (handle == ugni_module->wildcard_ep) {
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id));
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint,
ugni_module->wc_remote_attr.proc_id,
(void *) &ep);
proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
proc_id));
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* check if the endpoint is known */
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64,
rc, (void *) ep, ugni_module->wc_remote_attr.proc_id));
return OPAL_ERR_NOT_FOUND;
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
if (OPAL_UNLIKELY(NULL == ep)) {
return rc;
}
}
} else {
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));

Просмотреть файл

@ -91,6 +91,7 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->endpoint_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t);
@ -208,6 +209,7 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
OBJ_DESTRUCT(&ugni_module->smsg_mboxes);
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
OBJ_DESTRUCT(&ugni_module->endpoint_lock);
OBJ_DESTRUCT(&ugni_module->endpoints);
OBJ_DESTRUCT(&ugni_module->eager_get_pending);

Просмотреть файл

@ -27,7 +27,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
mbox->attr.smsg_attr.msg_buffer = base_reg->base;
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (OPAL_PROC_MY_NAME);
mbox->attr.proc_name = OPAL_PROC_MY_NAME;
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
}

Просмотреть файл

@ -427,7 +427,7 @@ static int usnic_add_procs(struct mca_btl_base_module_t* base_module,
/* Find all the endpoints with a complete set of USD destinations
and mark them as reachable */
for (size_t i = 0; i < nprocs; ++i) {
for (size_t i = 0; NULL != reachable && i < nprocs; ++i) {
if (NULL != endpoints[i]) {
bool happy = true;
for (int channel = 0; channel < USNIC_NUM_CHANNELS; ++channel) {

Просмотреть файл

@ -239,8 +239,10 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit;
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_vader.super.btl_flags |= MCA_BTL_FLAGS_RDMA;
/* Single copy mechanisms should provide better bandwidth */
mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
@ -248,7 +250,6 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_get = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
mca_btl_vader.super.btl_put = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
} else {
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
@ -6,6 +7,8 @@
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -162,6 +165,11 @@ static int opal_convert_string_to_jobid_should_never_be_called(opal_jobid_t *job
return OPAL_ERR_NOT_SUPPORTED;
}
static struct opal_proc_t *opal_proc_for_name_should_never_be_called (opal_process_name_t name)
{
return NULL;
}
char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_print_should_never_be_called;
char* (*opal_vpid_print)(const opal_vpid_t) = opal_vpid_print_should_never_be_called;
char* (*opal_jobid_print)(const opal_jobid_t) = opal_jobid_print_should_never_be_called;
@ -169,6 +177,7 @@ int (*opal_convert_string_to_process_name)(opal_process_name_t *name, const char
int (*opal_convert_process_name_to_string)(char** name_string, const opal_process_name_t *name) = opal_convert_process_name_to_string_should_never_be_called;
char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid) = opal_convert_jobid_to_string_should_never_be_called;
int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string) = opal_convert_string_to_jobid_should_never_be_called;
struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name) = opal_proc_for_name_should_never_be_called;
char* opal_get_proc_hostname(const opal_proc_t *proc)
{

Просмотреть файл

@ -136,6 +136,13 @@ OPAL_DECLSPEC extern char* (*opal_jobid_print)(const opal_jobid_t);
OPAL_DECLSPEC extern char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid);
OPAL_DECLSPEC extern int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string);
/**
* Lookup an opal_proc_t by name
*
* @param name (IN) name to lookup
*/
OPAL_DECLSPEC extern struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name);
#define OPAL_NAME_PRINT(OPAL_PN) opal_process_name_print(OPAL_PN)
#define OPAL_JOBID_PRINT(OPAL_PN) opal_jobid_print(OPAL_PN)
#define OPAL_VPID_PRINT(OPAL_PN) opal_vpid_print(OPAL_PN)

Просмотреть файл

@ -113,6 +113,8 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
if (NULL == oshmem_group_all) {
osh_group->ompi_comm = &(ompi_mpi_comm_world.comm);
} else {
int my_rank = MPI_UNDEFINED;
err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group);
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
return NULL;
@ -132,6 +134,10 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
break;
}
}
/* NTH: keep track of my rank in the new group for the workaround below */
if (ranks[i] == ompi_comm_rank (&ompi_mpi_comm_world.comm)) {
my_rank = i;
}
}
err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group);
@ -139,6 +145,15 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
free(ranks);
return NULL;
}
/* NTH: XXX -- WORKAROUND -- The oshmem code overwrites ompi_proc_local_proc with its
* own proc but does not update the proc list in comm world or comm self. This causes
* the code in ompi_group_incl that updates grp_my_rank to fail. This will cause failures
* here and when an application attempts to mix oshmem and mpi so it will really need to
* be fixed in oshmem/proc and not here. For now we need to work around a new jenkins
* failure so set my group ranking so we do not crash when running ompi_comm_create_group. */
new_group->grp_my_rank = my_rank;
err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
free(ranks);