Коммит
6a0c7b85bf
@ -139,54 +139,8 @@ static opal_mutex_t ompi_cid_lock;
|
||||
static opal_list_t ompi_registered_comms;
|
||||
|
||||
|
||||
/* This variable is zero (false) if all processes in MPI_COMM_WORLD
|
||||
* did not require MPI_THREAD_MULTIPLE support, and is 1 (true) as
|
||||
* soon as at least one process requested support for THREAD_MULTIPLE */
|
||||
static int ompi_comm_world_thread_level_mult=0;
|
||||
|
||||
|
||||
int ompi_comm_cid_init (void)
|
||||
{
|
||||
#if OMPI_ENABLE_THREAD_MULTIPLE
|
||||
ompi_proc_t **procs, *thisproc;
|
||||
uint8_t thread_level;
|
||||
uint8_t *tlpointer;
|
||||
int ret;
|
||||
size_t i, size, numprocs;
|
||||
|
||||
/** Note that the following call only returns processes
|
||||
* with the same jobid. This is on purpose, since
|
||||
* we switch for the dynamic communicators anyway
|
||||
* to the original (slower) cid allocation algorithm.
|
||||
*/
|
||||
procs = ompi_proc_world ( &numprocs );
|
||||
|
||||
for ( i=0; i<numprocs; i++ ) {
|
||||
thisproc = procs[i];
|
||||
|
||||
OPAL_MODEX_RECV_STRING(ret, "MPI_THREAD_LEVEL",
|
||||
&thisproc->super.proc_name,
|
||||
(uint8_t**)&tlpointer, &size);
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
thread_level = *((uint8_t *) tlpointer);
|
||||
if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) {
|
||||
ompi_comm_world_thread_level_mult = 1;
|
||||
break;
|
||||
}
|
||||
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
||||
if (ompi_mpi_thread_multiple) {
|
||||
ompi_comm_world_thread_level_mult = 1;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
free(procs);
|
||||
#else
|
||||
ompi_comm_world_thread_level_mult = 0; // silence compiler warning if not used
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2006-2010 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2014 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Inria. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Universite Bordeaux 1
|
||||
@ -102,12 +102,26 @@ int ompi_comm_init(void)
|
||||
OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t);
|
||||
assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0);
|
||||
group = OBJ_NEW(ompi_group_t);
|
||||
group->grp_proc_pointers = ompi_proc_world(&size);
|
||||
group->grp_proc_count = (int)size;
|
||||
|
||||
size = ompi_process_info.num_procs;
|
||||
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
|
||||
group->grp_proc_count = size;
|
||||
|
||||
for (size_t i = 0 ; i < size ; ++i) {
|
||||
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
|
||||
/* look for existing ompi_proc_t that matches this name */
|
||||
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name);
|
||||
if (NULL == group->grp_proc_pointers[i]) {
|
||||
/* set sentinel value */
|
||||
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name);
|
||||
} else {
|
||||
OBJ_RETAIN (group->grp_proc_pointers[i]);
|
||||
}
|
||||
}
|
||||
|
||||
OMPI_GROUP_SET_INTRINSIC (group);
|
||||
OMPI_GROUP_SET_DENSE (group);
|
||||
ompi_set_group_rank(group, ompi_proc_local());
|
||||
ompi_group_increment_proc_count (group);
|
||||
|
||||
ompi_mpi_comm_world.comm.c_contextid = 0;
|
||||
ompi_mpi_comm_world.comm.c_id_start_index = 4;
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
@ -1293,6 +1293,22 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
|
||||
/**********************************************************************/
|
||||
/**********************************************************************/
|
||||
/**********************************************************************/
|
||||
static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid)
|
||||
{
|
||||
int size = group ? ompi_group_size (group) : 0;
|
||||
|
||||
for (int i = 1 ; i < size ; ++i) {
|
||||
opal_process_name_t name = ompi_group_get_proc_name (group, i);
|
||||
|
||||
if (thisjobid != ((ompi_process_name_t *) &name)->jobid) {
|
||||
/* at least one is different */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* All we want to do in this function is determine if the number of
|
||||
* jobids in the local and/or remote group is > 1. This tells us to
|
||||
* set the disconnect flag. We don't actually care what the true
|
||||
@ -1300,56 +1316,30 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
|
||||
*/
|
||||
void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size, rsize;
|
||||
bool found=false;
|
||||
bool found;
|
||||
ompi_jobid_t thisjobid;
|
||||
ompi_group_t *grp=NULL;
|
||||
ompi_proc_t *proc = NULL;
|
||||
|
||||
/* special case for MPI_COMM_NULL */
|
||||
if (comm == MPI_COMM_NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rsize = ompi_comm_remote_size(comm);
|
||||
thisjobid = ompi_group_get_proc_name (comm->c_local_group, 0).jobid;
|
||||
|
||||
/* loop over all processes in local group and check for
|
||||
* a different jobid
|
||||
*/
|
||||
grp = comm->c_local_group;
|
||||
proc = ompi_group_peer_lookup(grp,0);
|
||||
thisjobid = ((ompi_process_name_t*)&proc->super.proc_name)->jobid;
|
||||
|
||||
for (i=1; i< size; i++) {
|
||||
proc = ompi_group_peer_lookup(grp,i);
|
||||
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
|
||||
/* at least one is different */
|
||||
found = true;
|
||||
goto complete;
|
||||
}
|
||||
found = ompi_dpm_group_is_dyn (comm->c_local_group, thisjobid);
|
||||
if (!found) {
|
||||
/* if inter-comm, loop over all processes in remote_group
|
||||
* and see if any are different from thisjobid
|
||||
*/
|
||||
found = ompi_dpm_group_is_dyn (comm->c_remote_group, thisjobid);
|
||||
}
|
||||
|
||||
/* if inter-comm, loop over all processes in remote_group
|
||||
* and see if any are different from thisjobid
|
||||
*/
|
||||
grp = comm->c_remote_group;
|
||||
for (i=0; i< rsize; i++) {
|
||||
proc = ompi_group_peer_lookup(grp,i);
|
||||
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
|
||||
/* at least one is different */
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
complete:
|
||||
/* if a different jobid was found, set the disconnect flag*/
|
||||
if (found) {
|
||||
ompi_comm_num_dyncomm++;
|
||||
OMPI_COMM_SET_DYNAMIC(comm);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Inria. All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -49,16 +49,14 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
|
||||
ompi_group_t *group2,
|
||||
int *ranks2)
|
||||
{
|
||||
int rank, proc, proc2;
|
||||
struct ompi_proc_t *proc1_pointer, *proc2_pointer;
|
||||
|
||||
if ( MPI_GROUP_EMPTY == group1 || MPI_GROUP_EMPTY == group2 ) {
|
||||
for (proc = 0; proc < n_ranks ; proc++) {
|
||||
for (int proc = 0; proc < n_ranks ; ++proc) {
|
||||
ranks2[proc] = MPI_UNDEFINED;
|
||||
}
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
#if OMPI_GROUP_SPARSE
|
||||
/*
|
||||
* If we are translating from a parent to a child that uses the sparse format
|
||||
* or vice versa, we use the translate ranks function corresponding to the
|
||||
@ -80,8 +78,11 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
|
||||
(group1,n_ranks,ranks1,group2,ranks2);
|
||||
}
|
||||
|
||||
/* unknown sparse group type */
|
||||
assert (0);
|
||||
}
|
||||
else if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/
|
||||
|
||||
if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/
|
||||
if(OMPI_GROUP_IS_SPORADIC(group2)) {
|
||||
return ompi_group_translate_ranks_sporadic
|
||||
(group1,n_ranks,ranks1,group2,ranks2);
|
||||
@ -95,28 +96,32 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
|
||||
(group1,n_ranks,ranks1,group2,ranks2);
|
||||
}
|
||||
|
||||
/* unknown sparse group type */
|
||||
assert (0);
|
||||
}
|
||||
else {
|
||||
/* loop over all ranks */
|
||||
for (proc = 0; proc < n_ranks; proc++) {
|
||||
rank=ranks1[proc];
|
||||
if ( MPI_PROC_NULL == rank) {
|
||||
ranks2[proc] = MPI_PROC_NULL;
|
||||
}
|
||||
else {
|
||||
proc1_pointer = ompi_group_peer_lookup(group1 ,rank);
|
||||
/* initialize to no "match" */
|
||||
ranks2[proc] = MPI_UNDEFINED;
|
||||
for (proc2 = 0; proc2 < group2->grp_proc_count; proc2++) {
|
||||
proc2_pointer= ompi_group_peer_lookup(group2, proc2);
|
||||
if ( proc1_pointer == proc2_pointer) {
|
||||
ranks2[proc] = proc2;
|
||||
break;
|
||||
}
|
||||
} /* end proc2 loop */
|
||||
} /* end proc loop */
|
||||
#endif
|
||||
|
||||
/* loop over all ranks */
|
||||
for (int proc = 0; proc < n_ranks; ++proc) {
|
||||
struct ompi_proc_t *proc1_pointer, *proc2_pointer;
|
||||
int rank = ranks1[proc];
|
||||
|
||||
if ( MPI_PROC_NULL == rank) {
|
||||
ranks2[proc] = MPI_PROC_NULL;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, rank);
|
||||
/* initialize to no "match" */
|
||||
ranks2[proc] = MPI_UNDEFINED;
|
||||
for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) {
|
||||
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
|
||||
if ( proc1_pointer == proc2_pointer) {
|
||||
ranks2[proc] = proc2;
|
||||
break;
|
||||
}
|
||||
} /* end proc2 loop */
|
||||
} /* end proc loop */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
@ -168,25 +173,6 @@ int ompi_group_dump (ompi_group_t* group)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the function that iterates through the sparse groups to the dense group
|
||||
* to reach the process pointer
|
||||
*/
|
||||
ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank)
|
||||
{
|
||||
int ranks1,ranks2;
|
||||
do {
|
||||
if(OMPI_GROUP_IS_DENSE(group)) {
|
||||
return group->grp_proc_pointers[rank];
|
||||
}
|
||||
ranks1 = rank;
|
||||
ompi_group_translate_ranks( group, 1, &ranks1,
|
||||
group->grp_parent_group_ptr,&ranks2);
|
||||
rank = ranks2;
|
||||
group = group->grp_parent_group_ptr;
|
||||
} while (1);
|
||||
}
|
||||
|
||||
int ompi_group_minloc ( int list[] , int length )
|
||||
{
|
||||
int i,index,min;
|
||||
@ -568,3 +554,23 @@ int ompi_group_compare(ompi_group_t *group1,
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
bool ompi_group_have_remote_peers (ompi_group_t *group)
|
||||
{
|
||||
for (size_t i = 0 ; i < group->grp_proc_count ; ++i) {
|
||||
ompi_proc_t *proc = NULL;
|
||||
#if OMPI_GROUP_SPARSE
|
||||
proc = ompi_group_peer_lookup (group, i);
|
||||
#else
|
||||
if ((intptr_t) group->grp_proc_pointers[i] < 0) {
|
||||
return true;
|
||||
}
|
||||
proc = group->grp_proc_pointers[i];
|
||||
#endif
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -252,8 +252,6 @@ int ompi_group_free (ompi_group_t **group);
|
||||
/**
|
||||
* Functions to handle process pointers for sparse group formats
|
||||
*/
|
||||
OMPI_DECLSPEC ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank);
|
||||
|
||||
int ompi_group_translate_ranks_sporadic ( ompi_group_t *group1,
|
||||
int n_ranks, const int *ranks1,
|
||||
ompi_group_t *group2,
|
||||
@ -324,25 +322,93 @@ int ompi_group_calc_bmap ( int n, int orig_size , const int *ranks );
|
||||
*/
|
||||
int ompi_group_minloc (int list[], int length);
|
||||
|
||||
/**
|
||||
* @brief Helper function for retreiving the proc of a group member in a dense group
|
||||
*
|
||||
* This function exists to handle the translation of sentinel group members to real
|
||||
* ompi_proc_t's. If a sentinel value is found and allocate is true then this function
|
||||
* looks for an existing ompi_proc_t using ompi_proc_for_name which will allocate a
|
||||
* ompi_proc_t if one does not exist. If allocate is false then sentinel values translate
|
||||
* to NULL.
|
||||
*/
|
||||
static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, const int peer_id, const bool allocate)
|
||||
{
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
if (peer_id >= group->grp_proc_count) {
|
||||
opal_output(0, "ompi_group_dense_lookup: invalid peer index (%d)", peer_id);
|
||||
return (struct ompi_proc_t *) NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
|
||||
if (!allocate) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* replace sentinel value with an actual ompi_proc_t */
|
||||
group->grp_proc_pointers[peer_id] =
|
||||
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id]));
|
||||
OBJ_RETAIN(group->grp_proc_pointers[peer_id]);
|
||||
}
|
||||
|
||||
return group->grp_proc_pointers[peer_id];
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the function that iterates through the sparse groups to the dense group
|
||||
* to reach the process pointer
|
||||
*/
|
||||
static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int rank, const bool allocate)
|
||||
{
|
||||
#if OMPI_GROUP_SPARSE
|
||||
do {
|
||||
if (OMPI_GROUP_IS_DENSE(group)) {
|
||||
return ompi_group_dense_lookup (group, peer_id, allocate);
|
||||
}
|
||||
int ranks1 = rank;
|
||||
ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank);
|
||||
group = group->grp_parent_group_ptr;
|
||||
} while (1);
|
||||
#else
|
||||
return ompi_group_dense_lookup (group, rank, allocate);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the raw proc pointer from the group
|
||||
*
|
||||
* This function will either return a ompi_proc_t if one exists (either stored in the group
|
||||
* or cached in the proc hash table) or a sentinel value representing the proc. This
|
||||
* differs from ompi_group_get_proc_ptr() which returns the ompi_proc_t or NULL.
|
||||
*/
|
||||
ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank);
|
||||
|
||||
static inline opal_process_name_t ompi_group_get_proc_name (ompi_group_t *group, int rank)
|
||||
{
|
||||
ompi_proc_t *proc = ompi_group_get_proc_ptr_raw (group, rank);
|
||||
if ((intptr_t) proc < 0) {
|
||||
return ompi_proc_sentinel_to_name ((intptr_t) proc);
|
||||
}
|
||||
|
||||
return proc->super.proc_name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inline function to check if sparse groups are enabled and return the direct access
|
||||
* to the proc pointer, otherwise the lookup function
|
||||
*/
|
||||
static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, int peer_id)
|
||||
{
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
if (peer_id >= group->grp_proc_count) {
|
||||
opal_output(0, "ompi_group_lookup_peer: invalid peer index (%d)", peer_id);
|
||||
return (struct ompi_proc_t *) NULL;
|
||||
}
|
||||
#endif
|
||||
#if OMPI_GROUP_SPARSE
|
||||
return ompi_group_get_proc_ptr (group, peer_id);
|
||||
#else
|
||||
return group->grp_proc_pointers[peer_id];
|
||||
#endif
|
||||
return ompi_group_get_proc_ptr (group, peer_id, true);
|
||||
}
|
||||
|
||||
static inline struct ompi_proc_t *ompi_group_peer_lookup_existing (ompi_group_t *group, int peer_id)
|
||||
{
|
||||
return ompi_group_get_proc_ptr (group, peer_id, false);
|
||||
}
|
||||
|
||||
bool ompi_group_have_remote_peers (ompi_group_t *group);
|
||||
|
||||
/**
|
||||
* Function to print the group info
|
||||
*/
|
||||
|
@ -210,14 +210,13 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size)
|
||||
*/
|
||||
void ompi_group_increment_proc_count(ompi_group_t *group)
|
||||
{
|
||||
int proc;
|
||||
ompi_proc_t * proc_pointer;
|
||||
for (proc = 0; proc < group->grp_proc_count; proc++) {
|
||||
proc_pointer = ompi_group_peer_lookup(group,proc);
|
||||
OBJ_RETAIN(proc_pointer);
|
||||
for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
|
||||
proc_pointer = ompi_group_peer_lookup_existing (group, proc);
|
||||
if (proc_pointer) {
|
||||
OBJ_RETAIN(proc_pointer);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -226,14 +225,13 @@ void ompi_group_increment_proc_count(ompi_group_t *group)
|
||||
|
||||
void ompi_group_decrement_proc_count(ompi_group_t *group)
|
||||
{
|
||||
int proc;
|
||||
ompi_proc_t * proc_pointer;
|
||||
for (proc = 0; proc < group->grp_proc_count; proc++) {
|
||||
proc_pointer = ompi_group_peer_lookup(group,proc);
|
||||
OBJ_RELEASE(proc_pointer);
|
||||
for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
|
||||
proc_pointer = ompi_group_peer_lookup_existing (group, proc);
|
||||
if (proc_pointer) {
|
||||
OBJ_RELEASE(proc_pointer);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -255,9 +253,6 @@ static void ompi_group_construct(ompi_group_t *new_group)
|
||||
|
||||
/* default the sparse values for groups */
|
||||
new_group->grp_parent_group_ptr = NULL;
|
||||
|
||||
/* return */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -300,9 +295,6 @@ static void ompi_group_destruct(ompi_group_t *group)
|
||||
opal_pointer_array_set_item(&ompi_group_f_to_c_table,
|
||||
group->grp_f_to_c_index, NULL);
|
||||
}
|
||||
|
||||
/* return */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -29,6 +29,66 @@
|
||||
|
||||
#include <math.h>
|
||||
|
||||
static int ompi_group_dense_overlap (ompi_group_t *group1, ompi_group_t *group2, opal_bitmap_t *bitmap)
|
||||
{
|
||||
ompi_proc_t *proc1_pointer, *proc2_pointer;
|
||||
int rc, overlap_count;
|
||||
|
||||
overlap_count = 0;
|
||||
|
||||
for (int proc1 = 0 ; proc1 < group1->grp_proc_count ; ++proc1) {
|
||||
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1);
|
||||
|
||||
/* check to see if this proc is in group2 */
|
||||
for (int proc2 = 0 ; proc2 < group2->grp_proc_count ; ++proc2) {
|
||||
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
|
||||
if( proc1_pointer == proc2_pointer ) {
|
||||
rc = opal_bitmap_set_bit (bitmap, proc2);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
++overlap_count;
|
||||
|
||||
break;
|
||||
}
|
||||
} /* end proc1 loop */
|
||||
} /* end proc loop */
|
||||
|
||||
return overlap_count;
|
||||
}
|
||||
|
||||
static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, const int peer_id)
|
||||
{
|
||||
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
|
||||
ompi_proc_t *proc =
|
||||
(ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id]));
|
||||
if (NULL != proc) {
|
||||
/* replace sentinel value with an actual ompi_proc_t */
|
||||
group->grp_proc_pointers[peer_id] = proc;
|
||||
/* retain the proc */
|
||||
OBJ_RETAIN(group->grp_proc_pointers[peer_id]);
|
||||
}
|
||||
}
|
||||
|
||||
return group->grp_proc_pointers[peer_id];
|
||||
}
|
||||
|
||||
ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank)
|
||||
{
|
||||
#if OMPI_GROUP_SPARSE
|
||||
do {
|
||||
if (OMPI_GROUP_IS_DENSE(group)) {
|
||||
return ompi_group_dense_lookup_raw (group, peer_id);
|
||||
}
|
||||
int ranks1 = rank;
|
||||
ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank);
|
||||
group = group->grp_parent_group_ptr;
|
||||
} while (1);
|
||||
#else
|
||||
return ompi_group_dense_lookup_raw (group, rank);
|
||||
#endif
|
||||
}
|
||||
|
||||
int ompi_group_calc_plist ( int n , const int *ranks ) {
|
||||
return sizeof(char *) * n ;
|
||||
}
|
||||
@ -37,9 +97,8 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
|
||||
ompi_group_t **new_group)
|
||||
{
|
||||
/* local variables */
|
||||
int proc,my_group_rank;
|
||||
int my_group_rank;
|
||||
ompi_group_t *group_pointer, *new_group_pointer;
|
||||
ompi_proc_t *my_proc_pointer;
|
||||
|
||||
group_pointer = (ompi_group_t *)group;
|
||||
|
||||
@ -56,9 +115,9 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
|
||||
}
|
||||
|
||||
/* put group elements in the list */
|
||||
for (proc = 0; proc < n; proc++) {
|
||||
for (int proc = 0; proc < n; proc++) {
|
||||
new_group_pointer->grp_proc_pointers[proc] =
|
||||
ompi_group_peer_lookup(group_pointer,ranks[proc]);
|
||||
ompi_group_get_proc_ptr_raw (group_pointer, ranks[proc]);
|
||||
} /* end proc loop */
|
||||
|
||||
/* increment proc reference counters */
|
||||
@ -67,10 +126,8 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
|
||||
/* find my rank */
|
||||
my_group_rank=group_pointer->grp_my_rank;
|
||||
if (MPI_UNDEFINED != my_group_rank) {
|
||||
my_proc_pointer=ompi_group_peer_lookup (group_pointer,my_group_rank);
|
||||
ompi_set_group_rank(new_group_pointer,my_proc_pointer);
|
||||
}
|
||||
else {
|
||||
ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
|
||||
} else {
|
||||
new_group_pointer->grp_my_rank = MPI_UNDEFINED;
|
||||
}
|
||||
|
||||
@ -87,114 +144,77 @@ int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2,
|
||||
ompi_group_t **new_group)
|
||||
{
|
||||
/* local variables */
|
||||
int new_group_size, proc1, proc2, found_in_group;
|
||||
int my_group_rank, cnt;
|
||||
ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer;
|
||||
ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL;
|
||||
|
||||
group1_pointer = (ompi_group_t *) group1;
|
||||
group2_pointer = (ompi_group_t *) group2;
|
||||
int new_group_size, cnt, rc, overlap_count;
|
||||
ompi_group_t *new_group_pointer;
|
||||
ompi_proc_t *proc2_pointer;
|
||||
opal_bitmap_t bitmap;
|
||||
|
||||
/*
|
||||
* form union
|
||||
*/
|
||||
|
||||
/* get new group size */
|
||||
new_group_size = group1_pointer->grp_proc_count;
|
||||
OBJ_CONSTRUCT(&bitmap, opal_bitmap_t);
|
||||
rc = opal_bitmap_init (&bitmap, 32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* check group2 elements to see if they need to be included in the list */
|
||||
for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) {
|
||||
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
|
||||
|
||||
/* check to see if this proc2 is alread in the group */
|
||||
found_in_group = 0;
|
||||
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
|
||||
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
|
||||
|
||||
if (proc1_pointer == proc2_pointer) {
|
||||
/* proc2 is in group1 - don't double count */
|
||||
found_in_group = 1;
|
||||
break;
|
||||
}
|
||||
} /* end proc1 loop */
|
||||
|
||||
if (found_in_group) {
|
||||
continue;
|
||||
}
|
||||
|
||||
new_group_size++;
|
||||
} /* end proc loop */
|
||||
overlap_count = ompi_group_dense_overlap (group1, group2, &bitmap);
|
||||
if (0 > overlap_count) {
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
return overlap_count;
|
||||
}
|
||||
|
||||
new_group_size = group1->grp_proc_count + group2->grp_proc_count - overlap_count;
|
||||
if ( 0 == new_group_size ) {
|
||||
*new_group = MPI_GROUP_EMPTY;
|
||||
OBJ_RETAIN(MPI_GROUP_EMPTY);
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* get new group struct */
|
||||
new_group_pointer = ompi_group_allocate(new_group_size);
|
||||
if (NULL == new_group_pointer) {
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
return MPI_ERR_GROUP;
|
||||
}
|
||||
|
||||
/* fill in the new group list */
|
||||
|
||||
/* put group1 elements in the list */
|
||||
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
|
||||
for (int proc1 = 0; proc1 < group1->grp_proc_count; ++proc1) {
|
||||
new_group_pointer->grp_proc_pointers[proc1] =
|
||||
ompi_group_peer_lookup(group1_pointer,proc1);
|
||||
ompi_group_get_proc_ptr_raw (group1, proc1);
|
||||
}
|
||||
cnt = group1_pointer->grp_proc_count;
|
||||
cnt = group1->grp_proc_count;
|
||||
|
||||
/* check group2 elements to see if they need to be included in the list */
|
||||
for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) {
|
||||
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
|
||||
|
||||
/* check to see if this proc2 is alread in the group */
|
||||
found_in_group = 0;
|
||||
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
|
||||
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
|
||||
|
||||
if (proc1_pointer == proc2_pointer) {
|
||||
/* proc2 is in group1 - don't double count */
|
||||
found_in_group = 1;
|
||||
break;
|
||||
}
|
||||
} /* end proc1 loop */
|
||||
|
||||
if (found_in_group) {
|
||||
for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) {
|
||||
if (opal_bitmap_is_set_bit (&bitmap, proc2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
new_group_pointer->grp_proc_pointers[cnt] =
|
||||
ompi_group_peer_lookup(group2_pointer,proc2);
|
||||
cnt++;
|
||||
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
|
||||
new_group_pointer->grp_proc_pointers[cnt++] = proc2_pointer;
|
||||
} /* end proc loop */
|
||||
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
|
||||
/* increment proc reference counters */
|
||||
ompi_group_increment_proc_count(new_group_pointer);
|
||||
|
||||
/* find my rank */
|
||||
my_group_rank = group1_pointer->grp_my_rank;
|
||||
if (MPI_UNDEFINED == my_group_rank) {
|
||||
my_group_rank = group2_pointer->grp_my_rank;
|
||||
if ( MPI_UNDEFINED != my_group_rank) {
|
||||
my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank);
|
||||
}
|
||||
if (MPI_UNDEFINED != group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) {
|
||||
ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
|
||||
} else {
|
||||
my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank);
|
||||
}
|
||||
|
||||
if ( MPI_UNDEFINED == my_group_rank ) {
|
||||
new_group_pointer->grp_my_rank = MPI_UNDEFINED;
|
||||
}
|
||||
else {
|
||||
ompi_set_group_rank(new_group_pointer, my_proc_pointer);
|
||||
}
|
||||
|
||||
*new_group = (MPI_Group) new_group_pointer;
|
||||
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -206,96 +226,65 @@ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2,
|
||||
ompi_group_t **new_group) {
|
||||
|
||||
/* local varibles */
|
||||
int new_group_size, proc1, proc2, found_in_group2, cnt;
|
||||
int my_group_rank;
|
||||
ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer;
|
||||
ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL;
|
||||
|
||||
|
||||
group1_pointer=(ompi_group_t *)group1;
|
||||
group2_pointer=(ompi_group_t *)group2;
|
||||
int new_group_size, overlap_count, rc;
|
||||
ompi_group_t *new_group_pointer;
|
||||
ompi_proc_t *proc1_pointer;
|
||||
opal_bitmap_t bitmap;
|
||||
|
||||
/*
|
||||
* form union
|
||||
*/
|
||||
|
||||
/* get new group size */
|
||||
new_group_size=0;
|
||||
OBJ_CONSTRUCT(&bitmap, opal_bitmap_t);
|
||||
rc = opal_bitmap_init (&bitmap, 32);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* loop over group1 members */
|
||||
for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) {
|
||||
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
|
||||
/* check to see if this proc is in group2 */
|
||||
found_in_group2=0;
|
||||
for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) {
|
||||
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
|
||||
if( proc1_pointer == proc2_pointer ) {
|
||||
found_in_group2=true;
|
||||
break;
|
||||
}
|
||||
} /* end proc1 loop */
|
||||
if(found_in_group2) {
|
||||
continue;
|
||||
}
|
||||
new_group_size++;
|
||||
} /* end proc loop */
|
||||
/* check group2 elements to see if they need to be included in the list */
|
||||
overlap_count = ompi_group_dense_overlap (group2, group1, &bitmap);
|
||||
if (0 > overlap_count) {
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
return overlap_count;
|
||||
}
|
||||
|
||||
new_group_size = group1->grp_proc_count - overlap_count;
|
||||
if ( 0 == new_group_size ) {
|
||||
*new_group = MPI_GROUP_EMPTY;
|
||||
OBJ_RETAIN(MPI_GROUP_EMPTY);
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* allocate a new ompi_group_t structure */
|
||||
new_group_pointer=ompi_group_allocate(new_group_size);
|
||||
new_group_pointer = ompi_group_allocate(new_group_size);
|
||||
if( NULL == new_group_pointer ) {
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
return MPI_ERR_GROUP;
|
||||
}
|
||||
|
||||
/* fill in group list */
|
||||
cnt=0;
|
||||
/* loop over group1 members */
|
||||
for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) {
|
||||
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
|
||||
/* check to see if this proc is in group2 */
|
||||
found_in_group2=0;
|
||||
for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) {
|
||||
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
|
||||
if( proc1_pointer == proc2_pointer ) {
|
||||
found_in_group2=true;
|
||||
break;
|
||||
}
|
||||
} /* end proc1 loop */
|
||||
if(found_in_group2) {
|
||||
for (int proc1 = 0, cnt = 0 ; proc1 < group1->grp_proc_count ; ++proc1) {
|
||||
if (opal_bitmap_is_set_bit (&bitmap, proc1)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
new_group_pointer->grp_proc_pointers[cnt] =
|
||||
ompi_group_peer_lookup(group1_pointer,proc1);
|
||||
|
||||
cnt++;
|
||||
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1);
|
||||
new_group_pointer->grp_proc_pointers[cnt++] = proc1_pointer;
|
||||
} /* end proc loop */
|
||||
|
||||
OBJ_DESTRUCT(&bitmap);
|
||||
|
||||
/* increment proc reference counters */
|
||||
ompi_group_increment_proc_count(new_group_pointer);
|
||||
|
||||
/* find my rank */
|
||||
my_group_rank=group1_pointer->grp_my_rank;
|
||||
if ( MPI_UNDEFINED != my_group_rank ) {
|
||||
my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank);
|
||||
}
|
||||
else {
|
||||
my_group_rank=group2_pointer->grp_my_rank;
|
||||
if ( MPI_UNDEFINED != my_group_rank ) {
|
||||
my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank);
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_UNDEFINED == my_group_rank ) {
|
||||
if (MPI_UNDEFINED == group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) {
|
||||
new_group_pointer->grp_my_rank = MPI_UNDEFINED;
|
||||
}
|
||||
else {
|
||||
ompi_set_group_rank(new_group_pointer,my_proc_pointer);
|
||||
} else {
|
||||
ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
|
||||
}
|
||||
|
||||
*new_group = (MPI_Group)new_group_pointer;
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -38,12 +41,10 @@ void ompi_set_group_rank(ompi_group_t *group, struct ompi_proc_t *proc_pointer)
|
||||
for (proc = 0; proc < group->grp_proc_count; proc++) {
|
||||
/* check and see if this proc pointer matches proc_pointer
|
||||
*/
|
||||
if (ompi_group_peer_lookup(group,proc) == proc_pointer) {
|
||||
if (ompi_group_peer_lookup_existing (group, proc) == proc_pointer) {
|
||||
group->grp_my_rank = proc;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} /* end proc loop */
|
||||
}
|
||||
|
||||
/* return */
|
||||
return;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,6 +28,7 @@
|
||||
#include "ompi/mca/mca.h"
|
||||
#include "opal/mca/base/mca_base_framework.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
|
||||
/*
|
||||
@ -60,6 +64,14 @@ OMPI_DECLSPEC extern mca_bml_base_component_t mca_bml_component;
|
||||
OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml;
|
||||
OMPI_DECLSPEC extern mca_base_framework_t ompi_bml_base_framework;
|
||||
|
||||
static inline struct mca_bml_base_endpoint_t *mca_bml_base_get_endpoint (struct ompi_proc_t *proc) {
|
||||
if (OPAL_UNLIKELY(NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML])) {
|
||||
mca_bml.bml_add_proc (proc);
|
||||
}
|
||||
|
||||
return (struct mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
}
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
#endif /* MCA_BML_BASE_H */
|
||||
|
@ -160,14 +160,11 @@ static inline bool mca_bml_base_btl_array_remove( mca_bml_base_btl_array_t* arra
|
||||
*/
|
||||
static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_index(mca_bml_base_btl_array_t* array, size_t item_index)
|
||||
{
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
if(item_index >= array->arr_size) {
|
||||
opal_output(0, "mca_bml_base_btl_array_get_index: invalid array index %lu >= %lu",
|
||||
(unsigned long)item_index, (unsigned long)array->arr_size);
|
||||
return 0;
|
||||
if (item_index < array->arr_size) {
|
||||
return &array->bml_btls[item_index];
|
||||
}
|
||||
#endif
|
||||
return &array->bml_btls[item_index];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -441,7 +438,7 @@ typedef int (*mca_bml_base_module_finalize_fn_t)( void );
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*
|
||||
* The mca_bml_base_module_add_procs_fn_t() is called by the PML to
|
||||
* determine the set of BMLs that should be used to reach each process.
|
||||
* determine the set of BTLs that should be used to reach each process.
|
||||
* Any addressing information exported by the peer via the mca_base_modex_send()
|
||||
* function should be available during this call via the corresponding
|
||||
* mca_base_modex_recv() function. The BML may utilize this information to
|
||||
@ -465,6 +462,25 @@ typedef int (*mca_bml_base_module_add_procs_fn_t)(
|
||||
struct opal_bitmap_t* reachable
|
||||
);
|
||||
|
||||
/**
|
||||
* PML->BML notification of change in the process list.
|
||||
*
|
||||
* @param proc (IN) Process
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*
|
||||
* The mca_bml_base_module_add_proc_fn_t() is called by the PML to
|
||||
* determine the set of BTLs that should be used to reach each process.
|
||||
* Any addressing information exported by the peer via the mca_base_modex_send()
|
||||
* function should be available during this call via the corresponding
|
||||
* mca_base_modex_recv() function. The BML may utilize this information to
|
||||
* determine reachability of each peer process.
|
||||
*
|
||||
* \note This function will return OMPI_ERR_UNREACH if the process can not
|
||||
* be reached by a currently active BTL. This is not a fatal error, and the
|
||||
* calling layer is free to continue using the BML interface.
|
||||
*/
|
||||
typedef int (*mca_bml_base_module_add_proc_fn_t) (struct ompi_proc_t *proc);
|
||||
|
||||
/**
|
||||
* Notification of change to the process list.
|
||||
*
|
||||
@ -559,6 +575,7 @@ struct mca_bml_base_module_t {
|
||||
mca_bml_base_component_t* bml_component; /**< pointer back to the BML component structure */
|
||||
|
||||
/* BML function table */
|
||||
mca_bml_base_module_add_proc_fn_t bml_add_proc;
|
||||
mca_bml_base_module_add_procs_fn_t bml_add_procs;
|
||||
mca_bml_base_module_del_procs_fn_t bml_del_procs;
|
||||
mca_bml_base_module_add_btl_fn_t bml_add_btl;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
@ -144,6 +144,293 @@ static void mca_bml_r2_calculate_bandwidth_latency (mca_bml_base_btl_array_t *bt
|
||||
}
|
||||
}
|
||||
|
||||
static mca_bml_base_endpoint_t *mca_bml_r2_allocate_endpoint (ompi_proc_t *proc) {
|
||||
mca_bml_base_endpoint_t *bml_endpoint;
|
||||
|
||||
/* allocate bml specific proc data */
|
||||
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
|
||||
if (NULL == bml_endpoint) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* preallocate space in array for max number of r2s */
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
|
||||
bml_endpoint->btl_max_send_size = -1;
|
||||
bml_endpoint->btl_proc = proc;
|
||||
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
|
||||
|
||||
bml_endpoint->btl_flags_or = 0;
|
||||
return bml_endpoint;
|
||||
}
|
||||
|
||||
static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl)
|
||||
{
|
||||
if (NULL != btl->btl_component->btl_progress) {
|
||||
bool found = false;
|
||||
|
||||
for (size_t p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
|
||||
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found == false) {
|
||||
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
|
||||
btl->btl_component->btl_progress;
|
||||
opal_progress_register (btl->btl_component->btl_progress);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int mca_bml_r2_endpoint_add_btl (struct ompi_proc_t *proc, mca_bml_base_endpoint_t *bml_endpoint,
|
||||
mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *btl_endpoint)
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = NULL;
|
||||
int btl_flags = btl->btl_flags;
|
||||
bool btl_in_use = false;
|
||||
size_t size;
|
||||
|
||||
/* NTH: these flags should have been sanitized by the btl. Once that is verified these
|
||||
* checks can be safely removed. */
|
||||
if ((btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put)) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
|
||||
" the %s BTL without any PUT function attached. Discard the flag !",
|
||||
btl->btl_component->btl_version.mca_component_name);
|
||||
btl_flags ^= MCA_BTL_FLAGS_PUT;
|
||||
}
|
||||
if ((btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get)) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
|
||||
" the %s BTL without any GET function attached. Discard the flag !",
|
||||
btl->btl_component->btl_version.mca_component_name);
|
||||
btl_flags ^= MCA_BTL_FLAGS_GET;
|
||||
}
|
||||
|
||||
if ((btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0) {
|
||||
/* If no protocol specified, we have 2 choices: we ignore the BTL
|
||||
* as we don't know which protocl to use, or we suppose that all
|
||||
* BTLs support the send protocol. This is really a btl error as
|
||||
* these flags should have been sanitized by the btl. */
|
||||
btl_flags |= MCA_BTL_FLAGS_SEND;
|
||||
}
|
||||
|
||||
if (btl_flags & MCA_BTL_FLAGS_SEND) {
|
||||
/* dont allow an additional BTL with a lower exclusivity ranking */
|
||||
bml_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_send, size - 1);
|
||||
size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
|
||||
|
||||
if (!bml_btl || bml_btl->btl->btl_exclusivity < btl->btl_exclusivity) {
|
||||
/* this btl has higher exclusivity than an existing btl or none exists */
|
||||
|
||||
opal_output_verbose(1, opal_btl_base_framework.framework_output,
|
||||
"mca: bml: Using %s btl for send to %s on node %s",
|
||||
btl->btl_component->btl_version.mca_component_name,
|
||||
OMPI_NAME_PRINT(&proc->super.proc_name),
|
||||
proc->super.proc_hostname);
|
||||
|
||||
/* cache the endpoint on the proc */
|
||||
if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) {
|
||||
bml_btl = mca_bml_base_btl_array_insert (&bml_endpoint->btl_send);
|
||||
bml_btl->btl = btl;
|
||||
bml_btl->btl_endpoint = btl_endpoint;
|
||||
bml_btl->btl_weight = 0;
|
||||
bml_btl->btl_flags = btl_flags;
|
||||
|
||||
/**
|
||||
* calculate the bitwise OR of the btl flags
|
||||
*/
|
||||
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
|
||||
} else {
|
||||
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||
"mca: bml: Not using %s btl for send to %s on node %s "
|
||||
"because %s btl has higher exclusivity (%d > %d)",
|
||||
btl->btl_component->btl_version.mca_component_name,
|
||||
OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname,
|
||||
bml_btl->btl->btl_component->btl_version.mca_component_name,
|
||||
bml_btl->btl->btl_exclusivity,
|
||||
btl->btl_exclusivity);
|
||||
}
|
||||
|
||||
btl_in_use = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* always add rdma endpoints */
|
||||
if ((btl_flags & MCA_BTL_FLAGS_RDMA) &&
|
||||
!((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) &&
|
||||
(0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
|
||||
mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
|
||||
|
||||
bml_btl_rdma->btl = btl;
|
||||
bml_btl_rdma->btl_endpoint = btl_endpoint;
|
||||
bml_btl_rdma->btl_weight = 0;
|
||||
bml_btl_rdma->btl_flags = btl_flags;
|
||||
|
||||
if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) {
|
||||
bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length;
|
||||
}
|
||||
|
||||
if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) {
|
||||
bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size;
|
||||
}
|
||||
|
||||
btl_in_use = true;
|
||||
}
|
||||
|
||||
return btl_in_use ? OMPI_SUCCESS : OMPI_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
static void mca_bml_r2_compute_endpoint_metrics (mca_bml_base_endpoint_t *bml_endpoint)
|
||||
{
|
||||
double total_bandwidth = 0;
|
||||
uint32_t latency;
|
||||
size_t n_send, n_rdma;
|
||||
|
||||
/* (1) determine the total bandwidth available across all btls
|
||||
* note that we need to do this here, as we may already have btls configured
|
||||
* (2) determine the highest priority ranking for latency
|
||||
* (3) compute the maximum amount of bytes that can be send without any
|
||||
* weighting. Once the left over is smaller than this number we will
|
||||
* start using the weight to compute the correct amount.
|
||||
*/
|
||||
n_send = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
|
||||
n_rdma = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
|
||||
|
||||
/* sort BTLs in descending order according to bandwidth value */
|
||||
qsort (bml_endpoint->btl_send.bml_btls, n_send,
|
||||
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
|
||||
|
||||
bml_endpoint->btl_rdma_index = 0;
|
||||
|
||||
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency);
|
||||
|
||||
/* (1) set the weight of each btl as a percentage of overall bandwidth
|
||||
* (2) copy all btl instances at the highest priority ranking into the
|
||||
* list of btls used for first fragments
|
||||
*/
|
||||
for (size_t n_index = 0 ; n_index < n_send ; ++n_index) {
|
||||
mca_bml_base_btl_t *bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
|
||||
mca_btl_base_module_t *btl = bml_btl->btl;
|
||||
|
||||
/* compute weighting factor for this r2 */
|
||||
if(btl->btl_bandwidth > 0) {
|
||||
bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
|
||||
} else {
|
||||
bml_btl->btl_weight = (float)(1.0 / n_send);
|
||||
}
|
||||
|
||||
/* check to see if this r2 is already in the array of r2s
|
||||
* used for first fragments - if not add it.
|
||||
*/
|
||||
if(btl->btl_latency == latency) {
|
||||
mca_bml_base_btl_t* bml_btl_new =
|
||||
mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
|
||||
*bml_btl_new = *bml_btl;
|
||||
}
|
||||
|
||||
/* set endpoint max send size as min of available btls */
|
||||
if (bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
|
||||
bml_endpoint->btl_max_send_size = btl->btl_max_send_size;
|
||||
}
|
||||
|
||||
/* sort BTLs in descending order according to bandwidth value */
|
||||
qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma,
|
||||
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
|
||||
|
||||
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency);
|
||||
|
||||
/* set rdma btl weights */
|
||||
for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) {
|
||||
mca_bml_base_btl_t *bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index);
|
||||
|
||||
/* compute weighting factor for this r2 */
|
||||
if (bml_btl->btl->btl_bandwidth > 0.0) {
|
||||
bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth);
|
||||
} else {
|
||||
bml_btl->btl_weight = (float)(1.0 / n_rdma);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
|
||||
{
|
||||
mca_bml_base_endpoint_t *bml_endpoint;
|
||||
/* at least one btl is in use */
|
||||
bool btl_in_use;
|
||||
int rc;
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == proc)) {
|
||||
return OMPI_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* check if this endpoint is already set up */
|
||||
if (NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
||||
OBJ_RETAIN(proc);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* add btls if not already done */
|
||||
if (OMPI_SUCCESS != (rc = mca_bml_r2_add_btls())) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
|
||||
if (OPAL_UNLIKELY(NULL == bml_endpoint)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for (int p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
|
||||
mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
|
||||
struct mca_btl_base_endpoint_t *btl_endpoint = NULL;
|
||||
|
||||
/* if the r2 can reach the destination proc it sets the
|
||||
* corresponding bit (proc index) in the reachable bitmap
|
||||
* and can return addressing information for each proc
|
||||
* that is passed back to the r2 on data transfer calls
|
||||
*/
|
||||
rc = btl->btl_add_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint, NULL);
|
||||
if (OMPI_SUCCESS != rc || NULL == btl_endpoint) {
|
||||
/* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
|
||||
* can take care of this task. */
|
||||
continue;
|
||||
}
|
||||
|
||||
rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoint);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint);
|
||||
} else {
|
||||
mca_bml_r2_register_progress (btl);
|
||||
btl_in_use = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!btl_in_use) {
|
||||
/* no btl is available for this proc */
|
||||
if (mca_bml_r2.show_unreach_errors) {
|
||||
opal_show_help ("help-mca-bml-r2.txt", "unreachable proc", true,
|
||||
OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
|
||||
(NULL != ompi_proc_local_proc->super.proc_hostname ?
|
||||
ompi_proc_local_proc->super.proc_hostname : "unknown!"),
|
||||
OMPI_NAME_PRINT(&(proc->super.proc_name)),
|
||||
(NULL != proc->super.proc_hostname ?
|
||||
proc->super.proc_hostname : "unknown!"),
|
||||
btl_names);
|
||||
}
|
||||
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
||||
/* compute metrics for registered btls */
|
||||
mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* For each proc setup a datastructure that indicates the BTLs
|
||||
* that can be used to reach the destination.
|
||||
@ -154,7 +441,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
struct ompi_proc_t** procs,
|
||||
struct opal_bitmap_t* reachable )
|
||||
{
|
||||
size_t p, p_index, n_new_procs = 0;
|
||||
size_t n_new_procs = 0;
|
||||
struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;
|
||||
struct ompi_proc_t** new_procs = NULL;
|
||||
int rc, ret = OMPI_SUCCESS;
|
||||
@ -170,7 +457,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
/* Select only the procs that don't yet have the BML proc struct. This prevent
|
||||
* us from calling btl->add_procs several times on the same destination proc.
|
||||
*/
|
||||
for(p_index = 0; p_index < nprocs; p_index++) {
|
||||
for (size_t p_index = 0 ; p_index < nprocs ; ++p_index) {
|
||||
struct ompi_proc_t* proc = procs[p_index];
|
||||
|
||||
if(NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
||||
@ -203,10 +490,9 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) {
|
||||
mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index];
|
||||
for (size_t p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
|
||||
mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
|
||||
int btl_inuse = 0;
|
||||
int btl_flags;
|
||||
|
||||
/* if the r2 can reach the destination proc it sets the
|
||||
* corresponding bit (proc index) in the reachable bitmap
|
||||
@ -217,240 +503,69 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*));
|
||||
|
||||
rc = btl->btl_add_procs(btl, n_new_procs, (opal_proc_t**)new_procs, btl_endpoints, reachable);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
/* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
|
||||
* can take care of this task.
|
||||
*/
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
/* This BTL encountered an error while adding procs. Continue in case some other
|
||||
* BTL(s) can be used. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* for each proc that is reachable */
|
||||
for( p = 0; p < n_new_procs; p++ ) {
|
||||
if(opal_bitmap_is_set_bit(reachable, p)) {
|
||||
ompi_proc_t *proc = new_procs[p];
|
||||
mca_bml_base_endpoint_t * bml_endpoint =
|
||||
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
mca_bml_base_btl_t* bml_btl = NULL;
|
||||
size_t size;
|
||||
|
||||
if(NULL == bml_endpoint) {
|
||||
/* allocate bml specific proc data */
|
||||
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
|
||||
if (NULL == bml_endpoint) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
|
||||
free(btl_endpoints);
|
||||
free(new_procs);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* preallocate space in array for max number of r2s */
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
|
||||
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
|
||||
bml_endpoint->btl_max_send_size = -1;
|
||||
bml_endpoint->btl_proc = proc;
|
||||
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
|
||||
|
||||
bml_endpoint->btl_flags_or = 0;
|
||||
}
|
||||
|
||||
btl_flags = btl->btl_flags;
|
||||
if( (btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
|
||||
" the %s BTL without any PUT function attached. Discard the flag !",
|
||||
btl->btl_component->btl_version.mca_component_name);
|
||||
btl_flags ^= MCA_BTL_FLAGS_PUT;
|
||||
}
|
||||
if( (btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
|
||||
" the %s BTL without any GET function attached. Discard the flag !",
|
||||
btl->btl_component->btl_version.mca_component_name);
|
||||
btl_flags ^= MCA_BTL_FLAGS_GET;
|
||||
}
|
||||
|
||||
if( (btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
|
||||
/**
|
||||
* If no protocol specified, we have 2 choices: we ignore the BTL
|
||||
* as we don't know which protocl to use, or we suppose that all
|
||||
* BTLs support the send protocol.
|
||||
*/
|
||||
btl_flags |= MCA_BTL_FLAGS_SEND;
|
||||
}
|
||||
|
||||
/* dont allow an additional BTL with a lower exclusivity ranking */
|
||||
size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
|
||||
if(size > 0) {
|
||||
bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
|
||||
/* skip this btl if the exclusivity is less than the previous only if the btl does not provide full rdma (for one-sided) */
|
||||
if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity && ((btl_flags & MCA_BTL_FLAGS_RDMA) != MCA_BTL_FLAGS_RDMA)) {
|
||||
btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]);
|
||||
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_bml_base_framework.framework_output,
|
||||
"mca: bml: Not using %s btl to %s on node %s "
|
||||
"because %s btl has higher exclusivity (%d > %d)",
|
||||
btl->btl_component->btl_version.mca_component_name,
|
||||
OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname,
|
||||
bml_btl->btl->btl_component->btl_version.mca_component_name,
|
||||
bml_btl->btl->btl_exclusivity,
|
||||
btl->btl_exclusivity);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_bml_base_framework.framework_output,
|
||||
"mca: bml: Using %s btl to %s on node %s",
|
||||
btl->btl_component->btl_version.mca_component_name,
|
||||
OMPI_NAME_PRINT(&proc->super.proc_name),
|
||||
proc->super.proc_hostname);
|
||||
|
||||
/* cache the endpoint on the proc */
|
||||
if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) {
|
||||
bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
|
||||
bml_btl->btl = btl;
|
||||
bml_btl->btl_endpoint = btl_endpoints[p];
|
||||
bml_btl->btl_weight = 0;
|
||||
bml_btl->btl_flags = btl_flags;
|
||||
|
||||
/**
|
||||
* calculate the bitwise OR of the btl flags
|
||||
*/
|
||||
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
|
||||
}
|
||||
|
||||
/* always add rdma endpoints */
|
||||
if ((btl_flags & MCA_BTL_FLAGS_RDMA) &&
|
||||
!((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) &&
|
||||
(0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
|
||||
mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
|
||||
|
||||
bml_btl_rdma->btl = btl;
|
||||
bml_btl_rdma->btl_endpoint = btl_endpoints[p];
|
||||
bml_btl_rdma->btl_weight = 0;
|
||||
bml_btl_rdma->btl_flags = btl_flags;
|
||||
|
||||
if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) {
|
||||
bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length;
|
||||
}
|
||||
|
||||
if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) {
|
||||
bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* This BTL is in use, allow the progress registration */
|
||||
btl_inuse++;
|
||||
for (size_t p = 0 ; p < n_new_procs ; ++p) {
|
||||
if (!opal_bitmap_is_set_bit(reachable, p)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ompi_proc_t *proc = new_procs[p];
|
||||
mca_bml_base_endpoint_t *bml_endpoint =
|
||||
(mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
mca_bml_base_btl_t *bml_btl = NULL;
|
||||
size_t size;
|
||||
|
||||
if (NULL == bml_endpoint) {
|
||||
bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
|
||||
if (NULL == bml_endpoint) {
|
||||
free(btl_endpoints);
|
||||
free(new_procs);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoints[p]);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* This BTL is in use, allow the progress registration */
|
||||
btl_inuse++;
|
||||
}
|
||||
|
||||
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
|
||||
size_t p;
|
||||
bool found = false;
|
||||
for( p = 0; p < mca_bml_r2.num_btl_progress; p++ ) {
|
||||
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(found == false) {
|
||||
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] =
|
||||
btl->btl_component->btl_progress;
|
||||
mca_bml_r2.num_btl_progress++;
|
||||
opal_progress_register( btl->btl_component->btl_progress );
|
||||
}
|
||||
if (btl_inuse) {
|
||||
mca_bml_r2_register_progress (btl);
|
||||
}
|
||||
}
|
||||
|
||||
free(btl_endpoints);
|
||||
|
||||
/* iterate back through procs and compute metrics for registered r2s */
|
||||
for(p=0; p<n_new_procs; p++) {
|
||||
ompi_proc_t *proc = new_procs[p];
|
||||
mca_bml_base_endpoint_t* bml_endpoint =
|
||||
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
double total_bandwidth = 0;
|
||||
uint32_t latency;
|
||||
size_t n_send, n_rdma;
|
||||
for (size_t p = 0; p < n_new_procs ; ++p) {
|
||||
mca_bml_base_endpoint_t *bml_endpoint =
|
||||
(mca_bml_base_endpoint_t *) new_procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
|
||||
/* skip over procs w/ no btl's registered */
|
||||
if(NULL == bml_endpoint) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* (1) determine the total bandwidth available across all btls
|
||||
* note that we need to do this here, as we may already have btls configured
|
||||
* (2) determine the highest priority ranking for latency
|
||||
* (3) compute the maximum amount of bytes that can be send without any
|
||||
* weighting. Once the left over is smaller than this number we will
|
||||
* start using the weight to compute the correct amount.
|
||||
*/
|
||||
n_send = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
|
||||
n_rdma = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
|
||||
|
||||
/* sort BTLs in descending order according to bandwidth value */
|
||||
qsort(bml_endpoint->btl_send.bml_btls, n_send,
|
||||
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
|
||||
|
||||
bml_endpoint->btl_rdma_index = 0;
|
||||
|
||||
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency);
|
||||
|
||||
/* (1) set the weight of each btl as a percentage of overall bandwidth
|
||||
* (2) copy all btl instances at the highest priority ranking into the
|
||||
* list of btls used for first fragments
|
||||
*/
|
||||
for (size_t n_index = 0 ; n_index < n_send ; ++n_index) {
|
||||
mca_bml_base_btl_t* bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
|
||||
mca_btl_base_module_t *btl = bml_btl->btl;
|
||||
|
||||
/* compute weighting factor for this r2 */
|
||||
if(btl->btl_bandwidth > 0) {
|
||||
bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
|
||||
} else {
|
||||
bml_btl->btl_weight = (float)(1.0 / n_send);
|
||||
}
|
||||
|
||||
/* check to see if this r2 is already in the array of r2s
|
||||
* used for first fragments - if not add it.
|
||||
*/
|
||||
if(btl->btl_latency == latency) {
|
||||
mca_bml_base_btl_t* bml_btl_new =
|
||||
mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
|
||||
*bml_btl_new = *bml_btl;
|
||||
}
|
||||
|
||||
/* set endpoint max send size as min of available btls */
|
||||
if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
|
||||
bml_endpoint->btl_max_send_size = btl->btl_max_send_size;
|
||||
}
|
||||
|
||||
/* sort BTLs in descending order according to bandwidth value */
|
||||
qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma,
|
||||
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
|
||||
|
||||
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency);
|
||||
|
||||
/* set rdma btl weights */
|
||||
for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) {
|
||||
mca_bml_base_btl_t *bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index);
|
||||
|
||||
/* compute weighting factor for this r2 */
|
||||
if (bml_btl->btl->btl_bandwidth > 0.0) {
|
||||
bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth);
|
||||
} else {
|
||||
bml_btl->btl_weight = (float)(1.0 / n_rdma);
|
||||
}
|
||||
if (NULL != bml_endpoint) {
|
||||
mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
/* see if we have a connection to everyone else */
|
||||
for(p = 0; p < n_new_procs; p++) {
|
||||
for(size_t p = 0; p < n_new_procs ; ++p) {
|
||||
ompi_proc_t *proc = new_procs[p];
|
||||
|
||||
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
||||
ret = OMPI_ERR_UNREACH;
|
||||
if (mca_bml_r2.show_unreach_errors) {
|
||||
opal_show_help("help-mca-bml-r2.txt",
|
||||
"unreachable proc",
|
||||
true,
|
||||
opal_show_help("help-mca-bml-r2.txt", "unreachable proc", true,
|
||||
OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
|
||||
(NULL != ompi_proc_local_proc->super.proc_hostname ?
|
||||
ompi_proc_local_proc->super.proc_hostname : "unknown!"),
|
||||
@ -459,6 +574,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
proc->super.proc_hostname : "unknown!"),
|
||||
btl_names);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -476,7 +592,6 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
static int mca_bml_r2_del_procs(size_t nprocs,
|
||||
struct ompi_proc_t** procs)
|
||||
{
|
||||
size_t p;
|
||||
int rc;
|
||||
struct ompi_proc_t** del_procs = (struct ompi_proc_t**)
|
||||
malloc(nprocs * sizeof(struct ompi_proc_t*));
|
||||
@ -486,26 +601,27 @@ static int mca_bml_r2_del_procs(size_t nprocs,
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for(p = 0; p < nprocs; p++) {
|
||||
for (size_t p = 0 ; p < nprocs ; ++p) {
|
||||
ompi_proc_t *proc = procs[p];
|
||||
/* We much check that there are 2 references to the proc (not 1). The
|
||||
* first reference belongs to ompi/proc the second belongs to the bml
|
||||
* since we retained it. We will release that reference at the end of
|
||||
* the loop below. */
|
||||
if(((opal_object_t*)proc)->obj_reference_count == 2) {
|
||||
if (((opal_object_t*)proc)->obj_reference_count == 2 &&
|
||||
NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
||||
del_procs[n_del_procs++] = proc;
|
||||
}
|
||||
}
|
||||
|
||||
for(p = 0; p < n_del_procs; p++) {
|
||||
for (size_t p = 0 ; p < n_del_procs ; ++p) {
|
||||
ompi_proc_t *proc = del_procs[p];
|
||||
mca_bml_base_endpoint_t* bml_endpoint =
|
||||
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
size_t f_index, f_size;
|
||||
size_t f_size;
|
||||
|
||||
/* notify each btl that the proc is going away */
|
||||
f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
|
||||
for(f_index = 0; f_index < f_size; f_index++) {
|
||||
for (size_t f_index = 0 ; f_index < f_size ; ++f_index) {
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, f_index);
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
@ -521,10 +637,12 @@ static int mca_bml_r2_del_procs(size_t nprocs,
|
||||
*/
|
||||
}
|
||||
|
||||
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
|
||||
|
||||
OBJ_RELEASE(proc);
|
||||
|
||||
/* do any required cleanup */
|
||||
OBJ_RELEASE(bml_endpoint);
|
||||
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
|
||||
}
|
||||
free(del_procs);
|
||||
|
||||
@ -835,6 +953,7 @@ int mca_bml_r2_component_fini(void)
|
||||
mca_bml_r2_module_t mca_bml_r2 = {
|
||||
.super = {
|
||||
.bml_component = &mca_bml_r2_component,
|
||||
.bml_add_proc = mca_bml_r2_add_proc,
|
||||
.bml_add_procs = mca_bml_r2_add_procs,
|
||||
.bml_del_procs = mca_bml_r2_del_procs,
|
||||
.bml_add_btl = mca_bml_r2_add_btl,
|
||||
@ -843,8 +962,7 @@ mca_bml_r2_module_t mca_bml_r2 = {
|
||||
.bml_register = mca_bml_r2_register,
|
||||
.bml_register_error = mca_bml_r2_register_error,
|
||||
.bml_finalize = mca_bml_r2_finalize,
|
||||
.bml_ft_event = mca_bml_r2_ft_event
|
||||
}
|
||||
|
||||
.bml_ft_event = mca_bml_r2_ft_event,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -35,25 +35,6 @@ int mca_coll_fca_init_query(bool enable_progress_threads,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int have_remote_peers(ompi_group_t *group, size_t size, int *local_peers)
|
||||
{
|
||||
ompi_proc_t *proc;
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
*local_peers = 0;
|
||||
ret = 0;
|
||||
for (i = 0; i < size; ++i) {
|
||||
proc = ompi_group_peer_lookup(group, i);
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
++*local_peers;
|
||||
} else {
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline ompi_proc_t* __local_rank_lookup(ompi_communicator_t *comm, int rank)
|
||||
{
|
||||
return ompi_group_peer_lookup(comm->c_local_group, rank);
|
||||
@ -618,7 +599,7 @@ mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
if (size < mca_coll_fca_component.fca_np)
|
||||
goto exit;
|
||||
|
||||
if (!have_remote_peers(comm->c_local_group, size, &local_peers) || OMPI_COMM_IS_INTER(comm))
|
||||
if (!ompi_group_have_remote_peers(comm->c_local_group) || OMPI_COMM_IS_INTER(comm))
|
||||
goto exit;
|
||||
|
||||
fca_module = OBJ_NEW(mca_coll_fca_module_t);
|
||||
|
@ -74,7 +74,6 @@ uint32_t mca_coll_sm_one = 1;
|
||||
*/
|
||||
static int sm_module_enable(mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm);
|
||||
static bool have_local_peers(ompi_group_t *group, size_t size);
|
||||
static int bootstrap_comm(ompi_communicator_t *comm,
|
||||
mca_coll_sm_module_t *module);
|
||||
static int mca_coll_sm_module_disable(mca_coll_base_module_t *module,
|
||||
@ -172,8 +171,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
/* If we're intercomm, or if there's only one process in the
|
||||
communicator, or if not all the processes in the communicator
|
||||
are not on this node, then we don't want to run */
|
||||
if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) ||
|
||||
!have_local_peers(comm->c_local_group, ompi_comm_size(comm))) {
|
||||
if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || ompi_group_have_remote_peers (comm->c_local_group)) {
|
||||
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||
"coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name);
|
||||
return NULL;
|
||||
@ -490,23 +488,6 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static bool have_local_peers(ompi_group_t *group, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
ompi_proc_t *proc;
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
proc = ompi_group_peer_lookup(group,i);
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static int bootstrap_comm(ompi_communicator_t *comm,
|
||||
mca_coll_sm_module_t *module)
|
||||
{
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -23,6 +26,7 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/mca/mtl/base/base.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include <psm.h>
|
||||
#include <psm_mq.h>
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -54,5 +57,14 @@ struct mca_mtl_psm_endpoint_t {
|
||||
typedef struct mca_mtl_psm_endpoint_t mca_mtl_psm_endpoint_t;
|
||||
OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint);
|
||||
|
||||
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
|
||||
{
|
||||
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
|
||||
ompi_mtl_psm_add_procs (mtl, 1, &ompi_proc);
|
||||
}
|
||||
|
||||
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,6 +11,8 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -42,7 +45,7 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
|
||||
int ret;
|
||||
size_t length;
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
||||
mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
|
||||
|
||||
assert(mtl == &ompi_mtl_psm.super);
|
||||
|
||||
@ -94,7 +97,7 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request;
|
||||
size_t length;
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
||||
mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*)ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
|
||||
|
||||
assert(mtl == &ompi_mtl_psm.super);
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -11,6 +12,8 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -24,6 +27,7 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/mca/mtl/base/base.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include <psm2.h>
|
||||
#include <psm2_mq.h>
|
||||
|
@ -55,5 +55,14 @@ struct mca_mtl_psm2_endpoint_t {
|
||||
typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t;
|
||||
OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint);
|
||||
|
||||
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
|
||||
{
|
||||
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
|
||||
ompi_mtl_psm2_add_procs (mtl, 1, &ompi_proc);
|
||||
}
|
||||
|
||||
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
||||
|
@ -43,7 +43,7 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl,
|
||||
int ret;
|
||||
size_t length;
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
||||
mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
|
||||
|
||||
assert(mtl == &ompi_mtl_psm2.super);
|
||||
|
||||
@ -95,7 +95,7 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl,
|
||||
mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request;
|
||||
size_t length;
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
||||
mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*)ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
|
||||
|
||||
assert(mtl == &ompi_mtl_psm2.super);
|
||||
|
||||
|
@ -299,7 +299,7 @@ ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank)
|
||||
static inline ptl_process_t
|
||||
ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank)
|
||||
{
|
||||
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank);
|
||||
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank, true);
|
||||
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
|
||||
}
|
||||
|
||||
|
@ -134,10 +134,8 @@ check_win_ok(ompi_communicator_t *comm, int flavor)
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < ompi_comm_size(comm) ; ++i) {
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(ompi_comm_peer_lookup(comm, i)->super.proc_flags)) {
|
||||
return OMPI_ERR_RMA_SHARED;
|
||||
}
|
||||
if (ompi_group_have_remote_peers (comm->c_local_group)) {
|
||||
return OMPI_ERR_RMA_SHARED;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -191,11 +191,9 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
|
||||
{
|
||||
/* allocate pml specific comm data */
|
||||
mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t);
|
||||
opal_list_item_t *item, *next_item;
|
||||
mca_pml_ob1_recv_frag_t* frag;
|
||||
mca_pml_ob1_recv_frag_t *frag, *next_frag;
|
||||
mca_pml_ob1_comm_proc_t* pml_proc;
|
||||
mca_pml_ob1_match_hdr_t* hdr;
|
||||
int i;
|
||||
|
||||
if (NULL == pml_comm) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
@ -210,16 +208,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
|
||||
mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
|
||||
comm->c_pml_comm = pml_comm;
|
||||
|
||||
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
|
||||
pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i);
|
||||
OBJ_RETAIN(pml_comm->procs[i].ompi_proc);
|
||||
}
|
||||
/* Grab all related messages from the non_existing_communicator pending queue */
|
||||
for( item = opal_list_get_first(&mca_pml_ob1.non_existing_communicator_pending);
|
||||
item != opal_list_get_end(&mca_pml_ob1.non_existing_communicator_pending);
|
||||
item = next_item ) {
|
||||
frag = (mca_pml_ob1_recv_frag_t*)item;
|
||||
next_item = opal_list_get_next(item);
|
||||
OPAL_LIST_FOREACH_SAFE(frag, next_frag, &mca_pml_ob1.non_existing_communicator_pending, mca_pml_ob1_recv_frag_t) {
|
||||
hdr = &frag->hdr.hdr_match;
|
||||
|
||||
/* Is this fragment for the current communicator ? */
|
||||
@ -229,8 +219,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
|
||||
/* As we now know we work on a fragment for this communicator
|
||||
* we should remove it from the
|
||||
* non_existing_communicator_pending list. */
|
||||
opal_list_remove_item( &mca_pml_ob1.non_existing_communicator_pending,
|
||||
item );
|
||||
opal_list_remove_item (&mca_pml_ob1.non_existing_communicator_pending,
|
||||
(opal_list_item_t *) frag);
|
||||
|
||||
add_fragment_to_unexpected:
|
||||
|
||||
@ -249,7 +239,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
|
||||
* We just have to push the fragment into the unexpected list of the corresponding
|
||||
* proc, or into the out-of-order (cant_match) list.
|
||||
*/
|
||||
pml_proc = &(pml_comm->procs[hdr->hdr_src]);
|
||||
pml_proc = mca_pml_ob1_peer_lookup(comm, hdr->hdr_src);
|
||||
|
||||
if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) {
|
||||
/* We're now expecting the next sequence number. */
|
||||
@ -283,12 +273,6 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
|
||||
|
||||
int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
|
||||
{
|
||||
mca_pml_ob1_comm_t* pml_comm = comm->c_pml_comm;
|
||||
int i;
|
||||
|
||||
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
|
||||
OBJ_RELEASE(pml_comm->procs[i].ompi_proc);
|
||||
}
|
||||
OBJ_RELEASE(comm->c_pml_comm);
|
||||
comm->c_pml_comm = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
@ -303,9 +287,9 @@ int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
|
||||
|
||||
int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
{
|
||||
mca_btl_base_selected_module_t *sm;
|
||||
opal_bitmap_t reachable;
|
||||
int rc;
|
||||
opal_list_item_t *item;
|
||||
|
||||
if(nprocs == 0)
|
||||
return OMPI_SUCCESS;
|
||||
@ -347,11 +331,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
BTLs requires iterating over the procs, as the BML does not
|
||||
expose all currently in use btls. */
|
||||
|
||||
for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ;
|
||||
item != opal_list_get_end(&mca_btl_base_modules_initialized) ;
|
||||
item = opal_list_get_next(item)) {
|
||||
mca_btl_base_selected_module_t *sm =
|
||||
(mca_btl_base_selected_module_t*) item;
|
||||
OPAL_LIST_FOREACH(sm, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
|
||||
if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_ob1_hdr_t)) {
|
||||
opal_show_help("help-mpi-pml-ob1.txt", "eager_limit_too_small",
|
||||
true,
|
||||
@ -589,13 +569,19 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose)
|
||||
|
||||
/* iterate through all procs on communicator */
|
||||
for( i = 0; i < (int)pml_comm->num_procs; i++ ) {
|
||||
mca_pml_ob1_comm_proc_t* proc = &pml_comm->procs[i];
|
||||
mca_pml_ob1_comm_proc_t* proc = pml_comm->procs[i];
|
||||
|
||||
if (NULL == proc) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
size_t n;
|
||||
|
||||
opal_output(0, "[Rank %d] expected_seq %d ompi_proc %p send_seq %d\n",
|
||||
i, proc->expected_sequence, (void*) proc->ompi_proc,
|
||||
proc->send_sequence);
|
||||
|
||||
/* dump all receive queues */
|
||||
if( opal_list_get_size(&proc->specific_receives) ) {
|
||||
opal_output(0, "expected specific receives\n");
|
||||
|
@ -40,14 +40,15 @@ static void mca_pml_ob1_comm_proc_destruct(mca_pml_ob1_comm_proc_t* proc)
|
||||
OBJ_DESTRUCT(&proc->frags_cant_match);
|
||||
OBJ_DESTRUCT(&proc->specific_receives);
|
||||
OBJ_DESTRUCT(&proc->unexpected_frags);
|
||||
if (proc->ompi_proc) {
|
||||
OBJ_RELEASE(proc->ompi_proc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static OBJ_CLASS_INSTANCE(
|
||||
mca_pml_ob1_comm_proc_t,
|
||||
opal_object_t,
|
||||
mca_pml_ob1_comm_proc_construct,
|
||||
mca_pml_ob1_comm_proc_destruct);
|
||||
OBJ_CLASS_INSTANCE(mca_pml_ob1_comm_proc_t, opal_object_t,
|
||||
mca_pml_ob1_comm_proc_construct,
|
||||
mca_pml_ob1_comm_proc_destruct);
|
||||
|
||||
|
||||
static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm)
|
||||
@ -63,11 +64,16 @@ static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm)
|
||||
|
||||
static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm)
|
||||
{
|
||||
size_t i;
|
||||
for(i=0; i<comm->num_procs; i++)
|
||||
OBJ_DESTRUCT((&comm->procs[i]));
|
||||
if(NULL != comm->procs)
|
||||
if (NULL != comm->procs) {
|
||||
for (size_t i = 0; i < comm->num_procs; ++i) {
|
||||
if (comm->procs[i]) {
|
||||
OBJ_RELEASE(comm->procs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
free(comm->procs);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&comm->wild_receives);
|
||||
OBJ_DESTRUCT(&comm->matching_lock);
|
||||
}
|
||||
@ -80,18 +86,13 @@ OBJ_CLASS_INSTANCE(
|
||||
mca_pml_ob1_comm_destruct);
|
||||
|
||||
|
||||
int mca_pml_ob1_comm_init_size(mca_pml_ob1_comm_t* comm, size_t size)
|
||||
int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
/* send message sequence-number support - sender side */
|
||||
comm->procs = (mca_pml_ob1_comm_proc_t*)malloc(sizeof(mca_pml_ob1_comm_proc_t)*size);
|
||||
comm->procs = (mca_pml_ob1_comm_proc_t **) calloc(size, sizeof (mca_pml_ob1_comm_proc_t *));
|
||||
if(NULL == comm->procs) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for(i=0; i<size; i++) {
|
||||
OBJ_CONSTRUCT(comm->procs+i, mca_pml_ob1_comm_proc_t);
|
||||
}
|
||||
comm->num_procs = size;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
||||
@ -42,6 +43,7 @@ struct mca_pml_ob1_comm_proc_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_proc_t);
|
||||
|
||||
/**
|
||||
* Cached on ompi_communicator_t to hold queues/state
|
||||
@ -56,7 +58,7 @@ struct mca_pml_comm_t {
|
||||
#endif
|
||||
opal_mutex_t matching_lock; /**< matching lock */
|
||||
opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */
|
||||
mca_pml_ob1_comm_proc_t* procs;
|
||||
mca_pml_ob1_comm_proc_t **procs;
|
||||
size_t num_procs;
|
||||
size_t last_probed;
|
||||
};
|
||||
@ -64,6 +66,18 @@ typedef struct mca_pml_comm_t mca_pml_ob1_comm_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t);
|
||||
|
||||
static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_communicator_t *comm, int rank)
|
||||
{
|
||||
mca_pml_ob1_comm_t *pml_comm = (mca_pml_ob1_comm_t *)comm->c_pml_comm;
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == pml_comm->procs[rank])) {
|
||||
pml_comm->procs[rank] = OBJ_NEW(mca_pml_ob1_comm_proc_t);
|
||||
pml_comm->procs[rank]->ompi_proc = ompi_comm_peer_lookup (comm, rank);
|
||||
OBJ_RETAIN(pml_comm->procs[rank]->ompi_proc);
|
||||
}
|
||||
|
||||
return pml_comm->procs[rank];
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize an instance of mca_pml_ob1_comm_t based on the communicator size.
|
||||
|
@ -144,9 +144,12 @@ static int mca_pml_ob1_get_unex_msgq_size (const struct mca_base_pvar_t *pvar, v
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < comm_size ; ++i) {
|
||||
pml_proc = pml_comm->procs + i;
|
||||
|
||||
values[i] = opal_list_get_size (&pml_proc->unexpected_frags);
|
||||
pml_proc = pml_comm->procs[i];
|
||||
if (pml_proc) {
|
||||
values[i] = opal_list_get_size (&pml_proc->unexpected_frags);
|
||||
} else {
|
||||
values[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -162,9 +165,13 @@ static int mca_pml_ob1_get_posted_recvq_size (const struct mca_base_pvar_t *pvar
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < comm_size ; ++i) {
|
||||
pml_proc = pml_comm->procs + i;
|
||||
pml_proc = pml_comm->procs[i];
|
||||
|
||||
values[i] = opal_list_get_size (&pml_proc->specific_receives);
|
||||
if (pml_proc) {
|
||||
values[i] = opal_list_get_size (&pml_proc->specific_receives);
|
||||
} else {
|
||||
values[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -148,7 +148,6 @@ mca_pml_ob1_imrecv( void *buf,
|
||||
int src, tag;
|
||||
ompi_communicator_t *comm;
|
||||
mca_pml_ob1_comm_proc_t* proc;
|
||||
mca_pml_ob1_comm_t* ob1_comm;
|
||||
uint64_t seq;
|
||||
|
||||
/* get the request from the message and the frag from the request
|
||||
@ -158,7 +157,6 @@ mca_pml_ob1_imrecv( void *buf,
|
||||
src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
|
||||
tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
|
||||
comm = (*message)->comm;
|
||||
ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
|
||||
seq = recvreq->req_recv.req_base.req_sequence;
|
||||
|
||||
/* make the request a recv request again */
|
||||
@ -196,7 +194,7 @@ mca_pml_ob1_imrecv( void *buf,
|
||||
/* Note - sequence number already assigned */
|
||||
recvreq->req_recv.req_base.req_sequence = seq;
|
||||
|
||||
proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer];
|
||||
proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
|
||||
recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
|
||||
prepare_recv_req_converter(recvreq);
|
||||
|
||||
@ -243,7 +241,6 @@ mca_pml_ob1_mrecv( void *buf,
|
||||
int src, tag, rc;
|
||||
ompi_communicator_t *comm;
|
||||
mca_pml_ob1_comm_proc_t* proc;
|
||||
mca_pml_ob1_comm_t* ob1_comm;
|
||||
uint64_t seq;
|
||||
|
||||
/* get the request from the message and the frag from the request
|
||||
@ -254,7 +251,6 @@ mca_pml_ob1_mrecv( void *buf,
|
||||
src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
|
||||
tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
|
||||
seq = recvreq->req_recv.req_base.req_sequence;
|
||||
ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
|
||||
|
||||
/* make the request a recv request again */
|
||||
/* The old request kept pointers to comm and the char datatype.
|
||||
@ -290,7 +286,7 @@ mca_pml_ob1_mrecv( void *buf,
|
||||
/* Note - sequence number already assigned */
|
||||
recvreq->req_recv.req_base.req_sequence = seq;
|
||||
|
||||
proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer];
|
||||
proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
|
||||
recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
|
||||
prepare_recv_req_converter(recvreq);
|
||||
|
||||
|
@ -126,15 +126,14 @@ int mca_pml_ob1_isend(const void *buf,
|
||||
ompi_communicator_t * comm,
|
||||
ompi_request_t ** request)
|
||||
{
|
||||
mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm;
|
||||
mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst);
|
||||
mca_pml_ob1_send_request_t *sendreq = NULL;
|
||||
ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst);
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
|
||||
dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
ompi_proc_t *dst_proc = ob1_proc->ompi_proc;
|
||||
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc);
|
||||
int16_t seqn;
|
||||
int rc;
|
||||
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1);
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
|
||||
|
||||
if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
|
||||
rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc,
|
||||
@ -176,10 +175,9 @@ int mca_pml_ob1_send(const void *buf,
|
||||
mca_pml_base_send_mode_t sendmode,
|
||||
ompi_communicator_t * comm)
|
||||
{
|
||||
mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm;
|
||||
ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst);
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
|
||||
dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst);
|
||||
ompi_proc_t *dst_proc = ob1_proc->ompi_proc;
|
||||
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc);
|
||||
mca_pml_ob1_send_request_t *sendreq = NULL;
|
||||
int16_t seqn;
|
||||
int rc;
|
||||
@ -202,7 +200,7 @@ int mca_pml_ob1_send(const void *buf,
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1);
|
||||
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
|
||||
|
||||
/**
|
||||
* The immediate send will not have a request, so they are
|
||||
|
@ -143,7 +143,7 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
|
||||
|
||||
/* source sequence number */
|
||||
proc = &comm->procs[hdr->hdr_src];
|
||||
proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src);
|
||||
|
||||
/* We generate the MSG_ARRIVED event as soon as the PML is aware
|
||||
* of a matching fragment arrival. Independing if it is received
|
||||
@ -650,7 +650,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
|
||||
|
||||
/* source sequence number */
|
||||
frag_msg_seq = hdr->hdr_seq;
|
||||
proc = &comm->procs[hdr->hdr_src];
|
||||
proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src);
|
||||
|
||||
/**
|
||||
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
|
||||
|
@ -100,7 +100,8 @@ static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request)
|
||||
static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, int complete)
|
||||
{
|
||||
mca_pml_ob1_recv_request_t* request = (mca_pml_ob1_recv_request_t*)ompi_request;
|
||||
mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm;
|
||||
ompi_communicator_t *comm = request->req_recv.req_base.req_comm;
|
||||
mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm;
|
||||
|
||||
if( true == request->req_match_received ) { /* way to late to cancel this one */
|
||||
assert( OMPI_ANY_TAG != ompi_request->req_status.MPI_TAG ); /* not matched isn't it */
|
||||
@ -108,11 +109,11 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
|
||||
}
|
||||
|
||||
/* The rest should be protected behind the match logic lock */
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
OPAL_THREAD_LOCK(&ob1_comm->matching_lock);
|
||||
if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) {
|
||||
opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request );
|
||||
opal_list_remove_item( &ob1_comm->wild_receives, (opal_list_item_t*)request );
|
||||
} else {
|
||||
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer;
|
||||
mca_pml_ob1_comm_proc_t* proc = mca_pml_ob1_peer_lookup (comm, request->req_recv.req_base.req_peer);
|
||||
opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request);
|
||||
}
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
|
||||
@ -122,7 +123,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
|
||||
* to true. Otherwise, the request will never be freed.
|
||||
*/
|
||||
request->req_recv.req_base.req_pml_complete = true;
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
ompi_request->req_status._cancelled = true;
|
||||
@ -260,7 +261,7 @@ static int mca_pml_ob1_recv_request_ack(
|
||||
ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
bml_endpoint = mca_bml_base_get_endpoint (proc);
|
||||
|
||||
/* by default copy everything */
|
||||
recvreq->req_send_offset = bytes_received;
|
||||
@ -654,7 +655,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
}
|
||||
|
||||
/* lookup bml datastructures */
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
bml_endpoint = mca_bml_base_get_endpoint (recvreq->req_recv.req_base.req_proc);
|
||||
rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
@ -1079,8 +1080,11 @@ static mca_pml_ob1_recv_frag_t*
|
||||
recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req,
|
||||
mca_pml_ob1_comm_proc_t *proc )
|
||||
{
|
||||
if (NULL == proc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
opal_list_t* unexpected_frags = &proc->unexpected_frags;
|
||||
opal_list_item_t *i;
|
||||
mca_pml_ob1_recv_frag_t* frag;
|
||||
int tag = req->req_recv.req_base.req_tag;
|
||||
|
||||
@ -1088,20 +1092,12 @@ recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req,
|
||||
return NULL;
|
||||
|
||||
if( OMPI_ANY_TAG == tag ) {
|
||||
for (i = opal_list_get_first(unexpected_frags);
|
||||
i != opal_list_get_end(unexpected_frags);
|
||||
i = opal_list_get_next(i)) {
|
||||
frag = (mca_pml_ob1_recv_frag_t*)i;
|
||||
|
||||
OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) {
|
||||
if( frag->hdr.hdr_match.hdr_tag >= 0 )
|
||||
return frag;
|
||||
}
|
||||
} else {
|
||||
for (i = opal_list_get_first(unexpected_frags);
|
||||
i != opal_list_get_end(unexpected_frags);
|
||||
i = opal_list_get_next(i)) {
|
||||
frag = (mca_pml_ob1_recv_frag_t*)i;
|
||||
|
||||
OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) {
|
||||
if( frag->hdr.hdr_match.hdr_tag == tag )
|
||||
return frag;
|
||||
}
|
||||
@ -1118,7 +1114,7 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
|
||||
mca_pml_ob1_comm_proc_t **p)
|
||||
{
|
||||
mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm;
|
||||
mca_pml_ob1_comm_proc_t* proc = comm->procs;
|
||||
mca_pml_ob1_comm_proc_t **procp = comm->procs;
|
||||
size_t i;
|
||||
|
||||
/*
|
||||
@ -1133,10 +1129,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
|
||||
mca_pml_ob1_recv_frag_t* frag;
|
||||
|
||||
/* loop over messages from the current proc */
|
||||
if((frag = recv_req_match_specific_proc(req, &proc[i]))) {
|
||||
*p = &proc[i];
|
||||
if((frag = recv_req_match_specific_proc(req, procp[i]))) {
|
||||
*p = procp[i];
|
||||
comm->last_probed = i;
|
||||
req->req_recv.req_base.req_proc = proc[i].ompi_proc;
|
||||
req->req_recv.req_base.req_proc = procp[i]->ompi_proc;
|
||||
prepare_recv_req_converter(req);
|
||||
return frag; /* match found */
|
||||
}
|
||||
@ -1145,10 +1141,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
|
||||
mca_pml_ob1_recv_frag_t* frag;
|
||||
|
||||
/* loop over messages from the current proc */
|
||||
if((frag = recv_req_match_specific_proc(req, &proc[i]))) {
|
||||
*p = &proc[i];
|
||||
if((frag = recv_req_match_specific_proc(req, procp[i]))) {
|
||||
*p = procp[i];
|
||||
comm->last_probed = i;
|
||||
req->req_recv.req_base.req_proc = proc[i].ompi_proc;
|
||||
req->req_recv.req_base.req_proc = procp[i]->ompi_proc;
|
||||
prepare_recv_req_converter(req);
|
||||
return frag; /* match found */
|
||||
}
|
||||
@ -1161,7 +1157,8 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
|
||||
|
||||
void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
|
||||
{
|
||||
mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm;
|
||||
ompi_communicator_t *comm = req->req_recv.req_base.req_comm;
|
||||
mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm;
|
||||
mca_pml_ob1_comm_proc_t* proc;
|
||||
mca_pml_ob1_recv_frag_t* frag;
|
||||
opal_list_t *queue;
|
||||
@ -1179,7 +1176,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
|
||||
|
||||
MCA_PML_BASE_RECV_START(&req->req_recv.req_base);
|
||||
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
OPAL_THREAD_LOCK(&ob1_comm->matching_lock);
|
||||
/**
|
||||
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
|
||||
* the cost of the request lock.
|
||||
@ -1188,12 +1185,12 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
|
||||
&(req->req_recv.req_base), PERUSE_RECV);
|
||||
|
||||
/* assign sequence number */
|
||||
req->req_recv.req_base.req_sequence = comm->recv_sequence++;
|
||||
req->req_recv.req_base.req_sequence = ob1_comm->recv_sequence++;
|
||||
|
||||
/* attempt to match posted recv */
|
||||
if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) {
|
||||
frag = recv_req_match_wild(req, &proc);
|
||||
queue = &comm->wild_receives;
|
||||
queue = &ob1_comm->wild_receives;
|
||||
#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
/* As we are in a homogeneous environment we know that all remote
|
||||
* architectures are exactly the same as the local one. Therefore,
|
||||
@ -1206,7 +1203,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
|
||||
}
|
||||
#endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
|
||||
} else {
|
||||
proc = &comm->procs[req->req_recv.req_base.req_peer];
|
||||
proc = mca_pml_ob1_peer_lookup (comm, req->req_recv.req_base.req_peer);
|
||||
req->req_recv.req_base.req_proc = proc->ompi_proc;
|
||||
frag = recv_req_match_specific_proc(req, proc);
|
||||
queue = &proc->specific_receives;
|
||||
@ -1221,7 +1218,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
|
||||
it when the message comes in. */
|
||||
append_recv_req_to_queue(queue, req);
|
||||
req->req_match_received = false;
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
|
||||
} else {
|
||||
if(OPAL_LIKELY(!IS_PROB_REQ(req))) {
|
||||
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX,
|
||||
@ -1239,7 +1236,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
|
||||
|
||||
opal_list_remove_item(&proc->unexpected_frags,
|
||||
(opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
|
||||
|
||||
switch(hdr->hdr_common.hdr_type) {
|
||||
case MCA_PML_OB1_HDR_TYPE_MATCH:
|
||||
@ -1269,14 +1266,14 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
|
||||
restarted with this request during mrecv */
|
||||
opal_list_remove_item(&proc->unexpected_frags,
|
||||
(opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
|
||||
|
||||
req->req_recv.req_base.req_addr = frag;
|
||||
mca_pml_ob1_recv_request_matched_probe(req, frag->btl,
|
||||
frag->segments, frag->num_segments);
|
||||
|
||||
} else {
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
|
||||
mca_pml_ob1_recv_request_matched_probe(req, frag->btl,
|
||||
frag->segments, frag->num_segments);
|
||||
}
|
||||
|
@ -433,8 +433,7 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
|
||||
{
|
||||
size_t i;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
mca_bml_base_endpoint_t* endpoint =
|
||||
(mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
|
||||
|
||||
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
|
@ -480,16 +480,16 @@ mca_pml_ob1_send_request_start_seq (mca_pml_ob1_send_request_t* sendreq, mca_bml
|
||||
static inline int
|
||||
mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
|
||||
{
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
|
||||
sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm;
|
||||
mca_bml_base_endpoint_t *endpoint = mca_bml_base_get_endpoint (sendreq->req_send.req_base.req_proc);
|
||||
ompi_communicator_t *comm = sendreq->req_send.req_base.req_comm;
|
||||
mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, sendreq->req_send.req_base.req_peer);
|
||||
int32_t seqn;
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == endpoint)) {
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
||||
seqn = OPAL_THREAD_ADD32(&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence, 1);
|
||||
seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
|
||||
|
||||
return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn);
|
||||
}
|
||||
|
389
ompi/proc/proc.c
389
ompi/proc/proc.c
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,7 +11,7 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -43,6 +44,8 @@
|
||||
|
||||
static opal_list_t ompi_proc_list;
|
||||
static opal_mutex_t ompi_proc_lock;
|
||||
static opal_hash_table_t ompi_proc_hash;
|
||||
|
||||
ompi_proc_t* ompi_proc_local_proc = NULL;
|
||||
|
||||
static void ompi_proc_construct(ompi_proc_t* proc);
|
||||
@ -83,49 +86,223 @@ void ompi_proc_destruct(ompi_proc_t* proc)
|
||||
}
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
opal_list_remove_item(&ompi_proc_list, (opal_list_item_t*)proc);
|
||||
opal_hash_table_remove_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name));
|
||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a new ompi_proc_T for the given jobid/vpid
|
||||
*
|
||||
* @param[in] jobid Job identifier
|
||||
* @param[in] vpid Process identifier
|
||||
* @param[out] procp New ompi_proc_t structure
|
||||
*
|
||||
* This function allocates a new ompi_proc_t and inserts it into
|
||||
* the process list and hash table.
|
||||
*/
|
||||
static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t **procp) {
|
||||
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
|
||||
|
||||
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
|
||||
|
||||
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = jobid;
|
||||
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = vpid;
|
||||
|
||||
opal_hash_table_set_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name),
|
||||
proc);
|
||||
|
||||
*procp = proc;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish setting up an ompi_proc_t
|
||||
*
|
||||
* @param[in] proc ompi process structure
|
||||
*
|
||||
* This function contains the core code of ompi_proc_complete_init() and
|
||||
* ompi_proc_refresh(). The tasks performed by this function include
|
||||
* retrieving the hostname (if below the modex cutoff), determining the
|
||||
* remote architecture, and calculating the locality of the process.
|
||||
*/
|
||||
static int ompi_proc_complete_init_single (ompi_proc_t *proc)
|
||||
{
|
||||
uint16_t u16, *u16ptr;
|
||||
int ret;
|
||||
|
||||
u16ptr = &u16;
|
||||
|
||||
if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid) {
|
||||
/* nothing else to do */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* get the locality information - all RTEs are required
|
||||
* to provide this information at startup */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
|
||||
} else {
|
||||
proc->super.proc_flags = u16;
|
||||
}
|
||||
|
||||
/* we can retrieve the hostname at no cost because it
|
||||
* was provided at startup */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
|
||||
(char**)&(proc->super.proc_hostname), OPAL_STRING);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
/* get the remote architecture - this might force a modex except
|
||||
* for those environments where the RM provides it */
|
||||
{
|
||||
uint32_t *ui32ptr;
|
||||
ui32ptr = &(proc->super.proc_arch);
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
|
||||
(void**)&ui32ptr, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
if (proc->super.proc_arch != opal_local_arch) {
|
||||
OBJ_RELEASE(proc->super.proc_convertor);
|
||||
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
|
||||
}
|
||||
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* must be same arch as my own */
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name)
|
||||
{
|
||||
ompi_proc_t *proc = NULL;
|
||||
int ret;
|
||||
|
||||
/* try to lookup the value in the hash table */
|
||||
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
|
||||
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
return &proc->super;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name)
|
||||
{
|
||||
ompi_proc_t *proc = NULL;
|
||||
int ret;
|
||||
|
||||
/* try to lookup the value in the hash table */
|
||||
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
return &proc->super;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
do {
|
||||
/* double-check that another competing thread has not added this proc */
|
||||
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* allocate a new ompi_proc_t object for the process and insert it into the process table */
|
||||
ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
/* allocation fail */
|
||||
break;
|
||||
}
|
||||
|
||||
/* finish filling in the important proc data fields */
|
||||
ret = ompi_proc_complete_init_single (proc);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
proc = NULL;
|
||||
break;
|
||||
}
|
||||
} while (0);
|
||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||
|
||||
return (opal_proc_t *) proc;
|
||||
}
|
||||
|
||||
int ompi_proc_init(void)
|
||||
{
|
||||
ompi_vpid_t i;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
int opal_proc_hash_init_size = (ompi_process_info.num_procs < ompi_add_procs_cutoff) ? ompi_process_info.num_procs :
|
||||
1024;
|
||||
ompi_proc_t *proc;
|
||||
int ret;
|
||||
#endif
|
||||
|
||||
OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&ompi_proc_hash, opal_hash_table_t);
|
||||
|
||||
/* create proc structures and find self */
|
||||
for( i = 0; i < ompi_process_info.num_procs; i++ ) {
|
||||
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
|
||||
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
|
||||
ret = opal_hash_table_init (&ompi_proc_hash, opal_proc_hash_init_size);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid;
|
||||
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = i;
|
||||
/* create a proc for the local process */
|
||||
ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, OMPI_PROC_MY_NAME->vpid, &proc);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if (i == OMPI_PROC_MY_NAME->vpid) {
|
||||
ompi_proc_local_proc = proc;
|
||||
proc->super.proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||
proc->super.proc_hostname = strdup(ompi_process_info.nodename);
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
/* Register the local proc with OPAL */
|
||||
opal_proc_local_set(&proc->super);
|
||||
/* set local process data */
|
||||
ompi_proc_local_proc = proc;
|
||||
proc->super.proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||
proc->super.proc_hostname = strdup(ompi_process_info.nodename);
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
/* Register the local proc with OPAL */
|
||||
opal_proc_local_set(&proc->super);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
/* add our arch to the modex */
|
||||
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL,
|
||||
OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
/* add our arch to the modex */
|
||||
OPAL_MODEX_SEND_VALUE(ret, PMIX_GLOBAL,
|
||||
OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ompi_process_info.num_procs < ompi_add_procs_cutoff) {
|
||||
/* create proc structures and find self */
|
||||
for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) {
|
||||
if (i == OMPI_PROC_MY_NAME->vpid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, i, &proc);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_proc_compare_vid (opal_list_item_t **a, opal_list_item_t **b)
|
||||
{
|
||||
ompi_proc_t *proca = (ompi_proc_t *) *a;
|
||||
ompi_proc_t *procb = (ompi_proc_t *) *b;
|
||||
|
||||
if (proca->super.proc_name.vpid > procb->super.proc_name.vpid) {
|
||||
return 1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* they should never be equal */
|
||||
}
|
||||
|
||||
/**
|
||||
* The process creation is split into two steps. The second step
|
||||
@ -140,58 +317,47 @@ int ompi_proc_complete_init(void)
|
||||
{
|
||||
ompi_proc_t *proc;
|
||||
int ret, errcode = OMPI_SUCCESS;
|
||||
uint16_t u16, *u16ptr;
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
u16ptr = &u16;
|
||||
|
||||
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
|
||||
if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid != OMPI_PROC_MY_NAME->vpid) {
|
||||
/* get the locality information - all RTEs are required
|
||||
* to provide this information at startup */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
|
||||
} else {
|
||||
proc->super.proc_flags = u16;
|
||||
}
|
||||
|
||||
/* we can retrieve the hostname at no cost because it
|
||||
* was provided at startup */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
|
||||
(char**)&(proc->super.proc_hostname), OPAL_STRING);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
/* we can live without it */
|
||||
proc->super.proc_hostname = NULL;
|
||||
}
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
/* get the remote architecture - this might force a modex except
|
||||
* for those environments where the RM provides it */
|
||||
{
|
||||
uint32_t *ui32ptr;
|
||||
ui32ptr = &(proc->super.proc_arch);
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
|
||||
(void**)&ui32ptr, OPAL_UINT32);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
if (proc->super.proc_arch != opal_local_arch) {
|
||||
OBJ_RELEASE(proc->super.proc_convertor);
|
||||
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
|
||||
}
|
||||
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
} else {
|
||||
errcode = ret;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* must be same arch as my own */
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
#endif
|
||||
ret = ompi_proc_complete_init_single (proc);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
errcode = ret;
|
||||
break;
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||
|
||||
if (ompi_process_info.num_procs >= ompi_add_procs_cutoff) {
|
||||
uint16_t u16, *u16ptr;
|
||||
|
||||
u16ptr = &u16;
|
||||
|
||||
/* find and add all local processes */
|
||||
for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) {
|
||||
opal_process_name_t proc_name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
|
||||
uint16_t locality = OPAL_PROC_NON_LOCAL;
|
||||
|
||||
if (OMPI_PROC_MY_NAME->vpid == i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* the runtime is required to fill in locality for all local processes by this
|
||||
* point. only local processes will have locality set */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
locality = u16;
|
||||
}
|
||||
|
||||
if (OPAL_PROC_NON_LOCAL != locality) {
|
||||
(void) ompi_proc_for_name (proc_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
opal_list_sort (&ompi_proc_list, ompi_proc_compare_vid);
|
||||
|
||||
return errcode;
|
||||
}
|
||||
|
||||
@ -227,6 +393,7 @@ int ompi_proc_finalize (void)
|
||||
/* now destruct the list and thread lock */
|
||||
OBJ_DESTRUCT(&ompi_proc_list);
|
||||
OBJ_DESTRUCT(&ompi_proc_lock);
|
||||
OBJ_DESTRUCT(&ompi_proc_hash);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -248,9 +415,7 @@ ompi_proc_t** ompi_proc_world(size_t *size)
|
||||
|
||||
/* First count how many match this jobid */
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
|
||||
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
|
||||
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
|
||||
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
|
||||
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, OMPI_CAST_RTE_NAME(&proc->super.proc_name), &my_name)) {
|
||||
++count;
|
||||
}
|
||||
@ -265,9 +430,7 @@ ompi_proc_t** ompi_proc_world(size_t *size)
|
||||
|
||||
/* now save only the procs that match this jobid */
|
||||
count = 0;
|
||||
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
|
||||
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
|
||||
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
|
||||
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
|
||||
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, &my_name)) {
|
||||
/* DO NOT RETAIN THIS OBJECT - the reference count on this
|
||||
* object will be adjusted by external callers. The intent
|
||||
@ -305,9 +468,7 @@ ompi_proc_t** ompi_proc_all(size_t* size)
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
|
||||
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
|
||||
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
|
||||
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
|
||||
/* We know this isn't consistent with the behavior in ompi_proc_world,
|
||||
* but we are leaving the RETAIN for now because the code using this function
|
||||
* assumes that the results need to be released when done. It will
|
||||
@ -349,9 +510,7 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name )
|
||||
/* return the proc-struct which matches this jobid+process id */
|
||||
mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID;
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
|
||||
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
|
||||
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
|
||||
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
|
||||
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) {
|
||||
rproc = proc;
|
||||
break;
|
||||
@ -366,21 +525,14 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name )
|
||||
int ompi_proc_refresh(void)
|
||||
{
|
||||
ompi_proc_t *proc = NULL;
|
||||
opal_list_item_t *item = NULL;
|
||||
ompi_vpid_t i = 0;
|
||||
int ret=OMPI_SUCCESS;
|
||||
uint16_t u16, *u16ptr;
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
|
||||
for( item = opal_list_get_first(&ompi_proc_list), i = 0;
|
||||
item != opal_list_get_end(&ompi_proc_list);
|
||||
item = opal_list_get_next(item), ++i ) {
|
||||
proc = (ompi_proc_t*)item;
|
||||
|
||||
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
|
||||
/* Does not change: proc->super.proc_name.vpid */
|
||||
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid;
|
||||
u16ptr = &u16;
|
||||
|
||||
/* Make sure to clear the local flag before we set it below */
|
||||
proc->super.proc_flags = 0;
|
||||
@ -392,56 +544,10 @@ int ompi_proc_refresh(void)
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
opal_proc_local_set(&proc->super);
|
||||
} else {
|
||||
/* get the locality information - all RTEs are required
|
||||
* to provide this information at startup */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
|
||||
} else {
|
||||
proc->super.proc_flags = u16;
|
||||
ret = ompi_proc_complete_init_single (proc);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) {
|
||||
/* IF the number of procs falls below the specified cutoff,
|
||||
* then we assume the job is small enough that retrieving
|
||||
* the hostname (which will typically cause retrieval of
|
||||
* ALL modex info for this proc) will have no appreciable
|
||||
* impact on launch scaling
|
||||
*/
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
|
||||
(char**)&(proc->super.proc_hostname), OPAL_STRING);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* just set the hostname to NULL for now - we'll fill it in
|
||||
* as modex_recv's are called for procs we will talk to, thus
|
||||
* avoiding retrieval of ALL modex info for this proc until
|
||||
* required. Transports that delay calling modex_recv until
|
||||
* first message will therefore scale better than those that
|
||||
* call modex_recv on all procs during init.
|
||||
*/
|
||||
proc->super.proc_hostname = NULL;
|
||||
}
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
{
|
||||
/* get the remote architecture */
|
||||
uint32_t* uiptr = &(proc->super.proc_arch);
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
|
||||
(void**)&uiptr, OPAL_UINT32);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
break;
|
||||
}
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
if (proc->super.proc_arch != opal_local_arch) {
|
||||
OBJ_RELEASE(proc->super.proc_convertor);
|
||||
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* must be same arch as my own */
|
||||
proc->super.proc_arch = opal_local_arch;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -454,7 +560,7 @@ int
|
||||
ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
|
||||
opal_buffer_t* buf)
|
||||
{
|
||||
int i, rc;
|
||||
int rc;
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
|
||||
@ -470,7 +576,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
|
||||
* reduced. For now, just go ahead and pack the info so it
|
||||
* can be sent.
|
||||
*/
|
||||
for (i=0; i<proclistsize; i++) {
|
||||
for (int i = 0 ; i < proclistsize ; ++i) {
|
||||
rc = opal_dss.pack(buf, &(proclist[i]->super.proc_name), 1, OMPI_NAME);
|
||||
if(rc != OPAL_SUCCESS) {
|
||||
OMPI_ERROR_LOG(rc);
|
||||
@ -503,9 +609,7 @@ ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew)
|
||||
/* return the proc-struct which matches this jobid+process id */
|
||||
mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID;
|
||||
OPAL_THREAD_LOCK(&ompi_proc_lock);
|
||||
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
|
||||
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
|
||||
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
|
||||
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
|
||||
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) {
|
||||
rproc = proc;
|
||||
*isnew = false;
|
||||
@ -538,7 +642,6 @@ ompi_proc_unpack(opal_buffer_t* buf,
|
||||
int proclistsize, ompi_proc_t ***proclist,
|
||||
int *newproclistsize, ompi_proc_t ***newproclist)
|
||||
{
|
||||
int i;
|
||||
size_t newprocs_len = 0;
|
||||
ompi_proc_t **plist=NULL, **newprocs = NULL;
|
||||
|
||||
@ -558,7 +661,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
|
||||
/* cycle through the array of provided procs and unpack
|
||||
* their info - as packed by ompi_proc_pack
|
||||
*/
|
||||
for (i=0; i<proclistsize; i++){
|
||||
for (int i = 0; i < proclistsize ; ++i){
|
||||
int32_t count=1;
|
||||
ompi_process_name_t new_name;
|
||||
uint32_t new_arch;
|
||||
|
@ -304,6 +304,35 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf,
|
||||
*/
|
||||
OMPI_DECLSPEC int ompi_proc_refresh(void);
|
||||
|
||||
/**
|
||||
* Get the ompi_proc_t for a given process name
|
||||
*
|
||||
* @param[in] proc_name opal process name
|
||||
*
|
||||
* @returns cached or new ompi_proc_t for the given process name
|
||||
*
|
||||
* This function looks up the given process name in the hash of existing
|
||||
* ompi_proc_t structures. If no ompi_proc_t structure exists matching the
|
||||
* given name a new ompi_proc_t is allocated, initialized, and returned.
|
||||
*
|
||||
* @note The ompi_proc_t is added to the local list of processes but is not
|
||||
* added to any communicator. ompi_comm_peer_lookup is responsible for caching
|
||||
* the ompi_proc_t on a communicator.
|
||||
*/
|
||||
OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name);
|
||||
|
||||
|
||||
OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name);
|
||||
|
||||
|
||||
static inline intptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) {
|
||||
return -*((intptr_t *) &name);
|
||||
}
|
||||
|
||||
static inline opal_process_name_t ompi_proc_sentinel_to_name (intptr_t sentinel) {
|
||||
sentinel = -sentinel;
|
||||
return *((opal_process_name_t *) &sentinel);
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -13,6 +14,8 @@
|
||||
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -88,7 +91,7 @@ static void try_kill_peers(ompi_communicator_t *comm,
|
||||
} else {
|
||||
assert(count <= nprocs);
|
||||
procs[count++] =
|
||||
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name);
|
||||
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,7 +99,7 @@ static void try_kill_peers(ompi_communicator_t *comm,
|
||||
for (i = 0; i < ompi_comm_remote_size(comm); ++i) {
|
||||
assert(count <= nprocs);
|
||||
procs[count++] =
|
||||
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name);
|
||||
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
|
||||
}
|
||||
|
||||
if (nprocs > 0) {
|
||||
|
@ -400,6 +400,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
opal_compare_proc = _process_name_compare;
|
||||
opal_convert_string_to_process_name = _convert_string_to_process_name;
|
||||
opal_convert_process_name_to_string = _convert_process_name_to_string;
|
||||
opal_proc_for_name = ompi_proc_for_name;
|
||||
|
||||
/* Register MCA variables */
|
||||
if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) {
|
||||
|
@ -64,6 +64,7 @@ int ompi_mpi_event_tick_rate = -1;
|
||||
char *ompi_mpi_show_mca_params_string = NULL;
|
||||
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
|
||||
bool ompi_mpi_preconnect_mpi = false;
|
||||
uint32_t ompi_add_procs_cutoff = 1024;
|
||||
|
||||
static bool show_default_mca_params = false;
|
||||
static bool show_file_mca_params = false;
|
||||
@ -288,6 +289,16 @@ int ompi_mpi_register_params(void)
|
||||
ompi_rte_abort(1, NULL);
|
||||
}
|
||||
|
||||
ompi_add_procs_cutoff = 1024;
|
||||
(void) mca_base_var_register ("ompi", "mpi", NULL, "add_procs_cutoff",
|
||||
"Maximum world size for pre-allocating resources for all "
|
||||
"remote processes. Increasing this limit may improve "
|
||||
"communication performance at the cost of memory usage "
|
||||
"(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
|
||||
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&ompi_add_procs_cutoff);
|
||||
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||
@ -123,11 +124,16 @@ OMPI_DECLSPEC extern bool ompi_have_sparse_group_storage;
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_use_sparse_group_storage;
|
||||
|
||||
/*
|
||||
/**
|
||||
* Cutoff point for retrieving hostnames
|
||||
*/
|
||||
OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff;
|
||||
|
||||
/**
|
||||
* Cutoff point for calling add_procs for all processes
|
||||
*/
|
||||
OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
|
||||
|
||||
/**
|
||||
* Register MCA parameters used by the MPI layer.
|
||||
*
|
||||
|
@ -605,12 +605,15 @@ typedef int (*mca_btl_base_module_finalize_fn_t)(
|
||||
* modex_recv() function. The BTL may utilize this information to
|
||||
* determine reachability of each peer process.
|
||||
*
|
||||
* For each process that is reachable by the BTL, the bit corresponding to the index
|
||||
* into the proc array (nprocs) should be set in the reachable bitmask. The BTL
|
||||
* will return an array of pointers to a data structure defined
|
||||
* by the BTL that is then returned to the BTL on subsequent calls to the BTL data
|
||||
* transfer functions (e.g btl_send). This may be used by the BTL to cache any addressing
|
||||
* or connection information (e.g. TCP socket, IB queue pair).
|
||||
* The caller may pass a "reachable" bitmap pointer. If it is not
|
||||
* NULL, for each process that is reachable by the BTL, the bit
|
||||
* corresponding to the index into the proc array (nprocs) should be
|
||||
* set in the reachable bitmask. The BTL will return an array of
|
||||
* pointers to a data structure defined by the BTL that is then
|
||||
* returned to the BTL on subsequent calls to the BTL data transfer
|
||||
* functions (e.g btl_send). This may be used by the BTL to cache any
|
||||
* addressing or connection information (e.g. TCP socket, IB queue
|
||||
* pair).
|
||||
*/
|
||||
typedef int (*mca_btl_base_module_add_procs_fn_t)(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
|
@ -871,6 +871,7 @@ int mca_btl_openib_add_procs(
|
||||
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
|
||||
struct opal_proc_t* proc = procs[i];
|
||||
mca_btl_openib_proc_t* ib_proc;
|
||||
bool found_existing = false;
|
||||
int remote_matching_port;
|
||||
|
||||
opal_output(-1, "add procs: adding proc %d", i);
|
||||
@ -898,6 +899,24 @@ int mca_btl_openib_add_procs(
|
||||
continue;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
|
||||
for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) {
|
||||
endpoint = ib_proc->proc_endpoints[j];
|
||||
if (endpoint->endpoint_btl == openib_btl) {
|
||||
found_existing = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
|
||||
|
||||
if (found_existing) {
|
||||
if (reachable) {
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
}
|
||||
peers[i] = endpoint;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* check if the remote proc has any ports that:
|
||||
- on the same subnet as the local proc, and
|
||||
- on that subnet, has a CPC in common with the local proc
|
||||
@ -1048,6 +1067,37 @@ int mca_btl_openib_add_procs(
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl, struct opal_proc_t *proc)
|
||||
{
|
||||
mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl;
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_openib_proc_t *ib_proc;
|
||||
|
||||
if (NULL == (ib_proc = mca_btl_openib_proc_create(proc))) {
|
||||
/* if we don't have connection info for this process, it's
|
||||
* okay because some other method might be able to reach it,
|
||||
* so just mark it as unreachable by us */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
|
||||
for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) {
|
||||
endpoint = ib_proc->proc_endpoints[j];
|
||||
if (endpoint->endpoint_btl == openib_btl) {
|
||||
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
|
||||
return endpoint;
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
|
||||
|
||||
BTL_VERBOSE(("creating new endpoint for remote process {.jobid = 0x%x, .vpid = 0x%x}",
|
||||
proc->proc_name.jobid, proc->proc_name.vpid));
|
||||
|
||||
endpoint = NULL;
|
||||
(void) mca_btl_openib_add_procs (btl, 1, &proc, &endpoint, NULL);
|
||||
return endpoint;
|
||||
}
|
||||
|
||||
/*
|
||||
* delete the proc as reachable from this btl module
|
||||
*/
|
||||
|
@ -874,6 +874,18 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp);
|
||||
|
||||
const char* btl_openib_get_transport_name(mca_btl_openib_transport_type_t transport_type);
|
||||
|
||||
/**
|
||||
* Get an endpoint for a process
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param proc (IN) opal process object
|
||||
*
|
||||
* This function will return an existing endpoint if one exists otherwise it will allocate
|
||||
* a new endpoint and return it.
|
||||
*/
|
||||
struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl,
|
||||
struct opal_proc_t *proc);
|
||||
|
||||
/**
|
||||
* Get a transport type of btl.
|
||||
*/
|
||||
|
@ -565,7 +565,8 @@ int btl_openib_register_mca_params(void)
|
||||
mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024;
|
||||
mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024;
|
||||
mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
|
||||
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
|
||||
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
|
||||
MCA_BTL_FLAGS_SEND;
|
||||
#if BTL_OPENIB_FAILOVER_ENABLED
|
||||
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT;
|
||||
#endif
|
||||
|
@ -218,6 +218,7 @@ typedef struct udcm_msg_hdr {
|
||||
union {
|
||||
/* UDCM_MESSAGE_CONNECT */
|
||||
struct msg_connect {
|
||||
opal_process_name_t rem_name;
|
||||
int32_t rem_ep_index;
|
||||
uint8_t rem_port_num;
|
||||
} req;
|
||||
@ -1473,36 +1474,26 @@ static int udcm_rc_qp_create_all (mca_btl_base_endpoint_t *lcl_ep)
|
||||
/* JMS: optimization target -- can we send something in private
|
||||
data to find the proc directly instead of having to search
|
||||
through *all* procs? */
|
||||
static mca_btl_openib_endpoint_t *udcm_find_endpoint (opal_pointer_array_t *endpoints,
|
||||
static mca_btl_openib_endpoint_t *udcm_find_endpoint (struct mca_btl_openib_module_t *btl,
|
||||
uint32_t qp_num, uint16_t lid,
|
||||
udcm_msg_hdr_t *msg_hdr)
|
||||
{
|
||||
uint8_t port_num;
|
||||
int i;
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
struct opal_proc_t *opal_proc;
|
||||
|
||||
port_num = msg_hdr->data.req.rem_port_num;
|
||||
|
||||
for (i = 0 ; i < opal_pointer_array_get_size (endpoints) ; ++i) {
|
||||
mca_btl_openib_endpoint_t *endpoint;
|
||||
modex_msg_t *msg;
|
||||
|
||||
endpoint = (mca_btl_openib_endpoint_t *)
|
||||
opal_pointer_array_get_item (endpoints, i);
|
||||
if (NULL == endpoint) {
|
||||
continue;
|
||||
}
|
||||
|
||||
msg = UDCM_ENDPOINT_REM_MODEX(endpoint);
|
||||
|
||||
if (msg->mm_qp_num == qp_num && msg->mm_port_num == port_num &&
|
||||
msg->mm_lid == lid)
|
||||
return endpoint;
|
||||
opal_proc = opal_proc_for_name (msg_hdr->data.req.rem_name);
|
||||
if (NULL == opal_proc) {
|
||||
BTL_ERROR(("could not get proc associated with remote peer"));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d",
|
||||
port_num, lid, msg_hdr->type));
|
||||
endpoint = mca_btl_openib_get_ep (&btl->super, opal_proc);
|
||||
if (NULL == endpoint) {
|
||||
BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d",
|
||||
msg_hdr->data.req.rem_port_num, lid, msg_hdr->type));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return endpoint;
|
||||
}
|
||||
|
||||
static int udcm_endpoint_init_data (mca_btl_base_endpoint_t *lcl_ep)
|
||||
@ -1678,6 +1669,7 @@ static int udcm_send_request (mca_btl_base_endpoint_t *lcl_ep,
|
||||
|
||||
msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index);
|
||||
msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num;
|
||||
msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME;
|
||||
|
||||
for (i = 0 ; i < mca_btl_openib_component.num_qps ; ++i) {
|
||||
msg->data->qps[i].psn = htonl(lcl_ep->qps[i].qp->lcl_psn);
|
||||
@ -1981,8 +1973,7 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m)
|
||||
lcl_ep = message->hdr.lcl_ep;
|
||||
|
||||
if (NULL == lcl_ep) {
|
||||
lcl_ep = udcm_find_endpoint (m->btl->device->endpoints, wc[i].src_qp,
|
||||
wc[i].slid, &message->hdr);
|
||||
lcl_ep = udcm_find_endpoint (m->btl, wc[i].src_qp, wc[i].slid, &message->hdr);
|
||||
}
|
||||
|
||||
if (NULL == lcl_ep ) {
|
||||
@ -2824,6 +2815,7 @@ static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_
|
||||
|
||||
msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index);
|
||||
msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num;
|
||||
msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME;
|
||||
|
||||
if (UDCM_MESSAGE_XCONNECT == msg_type) {
|
||||
BTL_VERBOSE(("Sending XConnect with qp: %d, psn: %d", lcl_ep->qps[0].qp->lcl_qp->qp_num,
|
||||
|
@ -221,7 +221,8 @@ mca_btl_portals4_component_open(void)
|
||||
mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
|
||||
mca_btl_portals4_module.super.btl_flags =
|
||||
MCA_BTL_FLAGS_RDMA |
|
||||
MCA_BTL_FLAGS_RDMA_MATCHED;
|
||||
MCA_BTL_FLAGS_RDMA_MATCHED |
|
||||
MCA_BTL_FLAGS_SEND;
|
||||
|
||||
mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
|
||||
|
||||
|
@ -98,7 +98,7 @@ static int mca_btl_self_component_register(void)
|
||||
mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX;
|
||||
mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
|
||||
mca_btl_self.btl_min_rdma_pipeline_size = 0;
|
||||
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
|
||||
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
|
||||
mca_btl_self.btl_bandwidth = 100;
|
||||
mca_btl_self.btl_latency = 0;
|
||||
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
@ -72,6 +72,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
||||
struct opal_proc_t* opal_proc = procs[i];
|
||||
mca_btl_tcp_proc_t* tcp_proc;
|
||||
mca_btl_base_endpoint_t* tcp_endpoint;
|
||||
bool existing_found = false;
|
||||
|
||||
/* Do not create loopback TCP connections */
|
||||
if( my_proc == opal_proc ) {
|
||||
@ -90,28 +91,43 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
||||
|
||||
OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
|
||||
|
||||
/* The btl_proc datastructure is shared by all TCP BTL
|
||||
* instances that are trying to reach this destination.
|
||||
* Cache the peer instance on the btl_proc.
|
||||
*/
|
||||
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
||||
if(NULL == tcp_endpoint) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
for (int j = 0 ; j < tcp_proc->proc_endpoint_count ; ++j) {
|
||||
tcp_endpoint = tcp_proc->proc_endpoints[j];
|
||||
if (tcp_endpoint->endpoint_btl == tcp_btl) {
|
||||
existing_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tcp_endpoint->endpoint_btl = tcp_btl;
|
||||
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
||||
if(rc != OPAL_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
OBJ_RELEASE(tcp_endpoint);
|
||||
continue;
|
||||
if (!existing_found) {
|
||||
/* The btl_proc datastructure is shared by all TCP BTL
|
||||
* instances that are trying to reach this destination.
|
||||
* Cache the peer instance on the btl_proc.
|
||||
*/
|
||||
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
||||
if(NULL == tcp_endpoint) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
tcp_endpoint->endpoint_btl = tcp_btl;
|
||||
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
||||
if(rc != OPAL_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
OBJ_RELEASE(tcp_endpoint);
|
||||
continue;
|
||||
}
|
||||
|
||||
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
|
||||
}
|
||||
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
|
||||
if (NULL != reachable) {
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
}
|
||||
|
||||
peers[i] = tcp_endpoint;
|
||||
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
|
||||
|
||||
/* we increase the count of MPI users of the event library
|
||||
once per peer, so that we are used until we aren't
|
||||
|
@ -269,7 +269,8 @@ static int mca_btl_tcp_component_register(void)
|
||||
MCA_BTL_FLAGS_SEND_INPLACE |
|
||||
MCA_BTL_FLAGS_NEED_CSUM |
|
||||
MCA_BTL_FLAGS_NEED_ACK |
|
||||
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
|
||||
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
|
||||
MCA_BTL_FLAGS_SEND;
|
||||
|
||||
mca_btl_tcp_module.super.btl_bandwidth = 100;
|
||||
mca_btl_tcp_module.super.btl_latency = 100;
|
||||
|
@ -14,7 +14,9 @@
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -738,6 +740,31 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
|
||||
opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
|
||||
*name, (void**)&proc);
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||
if (OPAL_UNLIKELY(NULL == proc)) {
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
opal_proc_t *opal_proc;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}",
|
||||
name->jobid, name->vpid));
|
||||
|
||||
opal_proc = opal_proc_for_name (*name);
|
||||
if (NULL == opal_proc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* try adding this proc to each btl until */
|
||||
for (int i = 0 ; i < mca_btl_tcp_component.tcp_num_btls ; ++i) {
|
||||
endpoint = NULL;
|
||||
(void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc,
|
||||
&endpoint, NULL);
|
||||
if (NULL != endpoint && NULL == proc) {
|
||||
/* get the proc and continue on (could probably just break here) */
|
||||
proc = endpoint->endpoint_proc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return proc;
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@
|
||||
|
||||
/* ompi and smsg endpoint attributes */
|
||||
typedef struct mca_btl_ugni_endpoint_attr_t {
|
||||
uint64_t proc_id;
|
||||
opal_process_name_t proc_name;
|
||||
uint32_t index;
|
||||
gni_smsg_attr_t smsg_attr;
|
||||
gni_mem_handle_t rmt_irq_mem_hndl;
|
||||
@ -67,6 +67,7 @@ typedef struct mca_btl_ugni_module_t {
|
||||
|
||||
opal_common_ugni_device_t *device;
|
||||
|
||||
opal_mutex_t endpoint_lock;
|
||||
size_t endpoint_count;
|
||||
opal_pointer_array_t endpoints;
|
||||
opal_hash_table_t id_to_endpoint;
|
||||
@ -229,6 +230,8 @@ mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers);
|
||||
|
||||
struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous send.
|
||||
*
|
||||
|
@ -28,13 +28,11 @@ static void
|
||||
mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs);
|
||||
static int mca_btl_ugni_smsg_setup (int nprocs);
|
||||
|
||||
int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers,
|
||||
opal_bitmap_t *reachable) {
|
||||
int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers,
|
||||
opal_bitmap_t *reachable) {
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
size_t i;
|
||||
int rc;
|
||||
void *mmap_start_addr;
|
||||
|
||||
@ -59,36 +57,45 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
for (size_t i = 0 ; i < nprocs ; ++i) {
|
||||
struct opal_proc_t *opal_proc = procs[i];
|
||||
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
|
||||
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
|
||||
ugni_module->nlocal_procs++;
|
||||
/* check for an existing endpoint */
|
||||
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) {
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
|
||||
ugni_module->nlocal_procs++;
|
||||
|
||||
/* ugni is allowed on local processes to provide support for network
|
||||
* atomic operations */
|
||||
/* ugni is allowed on local processes to provide support for network
|
||||
* atomic operations */
|
||||
}
|
||||
|
||||
/* Create and Init endpoints */
|
||||
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* go ahead and connect the local endpoint for RDMA/CQ write */
|
||||
if (opal_proc == opal_proc_local_get ()) {
|
||||
ugni_module->local_ep = peers[i];
|
||||
}
|
||||
|
||||
/* Add this endpoint to the pointer array. */
|
||||
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
|
||||
|
||||
++ugni_module->endpoint_count;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
/* Create and Init endpoints */
|
||||
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
||||
return rc;
|
||||
/* Set the reachable bit if necessary */
|
||||
if (reachable) {
|
||||
rc = opal_bitmap_set_bit (reachable, i);
|
||||
}
|
||||
|
||||
/* go ahead and connect the local endpoint for RDMA/CQ write */
|
||||
if (opal_proc == opal_proc_local_get ()) {
|
||||
ugni_module->local_ep = peers[i];
|
||||
}
|
||||
|
||||
/* Add this endpoint to the pointer array. */
|
||||
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
|
||||
|
||||
/* Set the reachable bit */
|
||||
rc = opal_bitmap_set_bit (reachable, i);
|
||||
++ugni_module->endpoint_count;
|
||||
}
|
||||
|
||||
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
|
||||
@ -224,6 +231,41 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) module;
|
||||
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(proc->proc_name);
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
int rc;
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
do {
|
||||
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
|
||||
if (OPAL_SUCCESS == rc) {
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Create and Init endpoints */
|
||||
rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add this endpoint to the pointer array. */
|
||||
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep));
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep);
|
||||
} while (0);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
return ep;
|
||||
}
|
||||
|
||||
|
||||
static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
|
@ -386,8 +386,8 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
static inline int
|
||||
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
uint64_t datagram_id, data, proc_id;
|
||||
uint32_t remote_addr, remote_id;
|
||||
uint64_t datagram_id, data;
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
gni_post_state_t post_state;
|
||||
gni_ep_handle_t handle;
|
||||
@ -425,15 +425,24 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
|
||||
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
|
||||
if (handle == ugni_module->wildcard_ep) {
|
||||
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id));
|
||||
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint,
|
||||
ugni_module->wc_remote_attr.proc_id,
|
||||
(void *) &ep);
|
||||
proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);
|
||||
|
||||
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
|
||||
proc_id));
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
||||
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
/* check if the endpoint is known */
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
|
||||
BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64,
|
||||
rc, (void *) ep, ugni_module->wc_remote_attr.proc_id));
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
|
||||
BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
|
||||
ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
|
||||
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
|
||||
if (OPAL_UNLIKELY(NULL == ep)) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
|
||||
|
@ -91,6 +91,7 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->endpoint_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t);
|
||||
@ -208,6 +209,7 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
OBJ_DESTRUCT(&ugni_module->smsg_mboxes);
|
||||
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
|
||||
OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
|
||||
OBJ_DESTRUCT(&ugni_module->endpoint_lock);
|
||||
OBJ_DESTRUCT(&ugni_module->endpoints);
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->eager_get_pending);
|
||||
|
@ -27,7 +27,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
|
||||
mbox->attr.smsg_attr.msg_buffer = base_reg->base;
|
||||
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
|
||||
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
|
||||
mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (OPAL_PROC_MY_NAME);
|
||||
mbox->attr.proc_name = OPAL_PROC_MY_NAME;
|
||||
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
|
||||
}
|
||||
|
||||
|
@ -427,7 +427,7 @@ static int usnic_add_procs(struct mca_btl_base_module_t* base_module,
|
||||
|
||||
/* Find all the endpoints with a complete set of USD destinations
|
||||
and mark them as reachable */
|
||||
for (size_t i = 0; i < nprocs; ++i) {
|
||||
for (size_t i = 0; NULL != reachable && i < nprocs; ++i) {
|
||||
if (NULL != endpoints[i]) {
|
||||
bool happy = true;
|
||||
for (int channel = 0; channel < USNIC_NUM_CHANNELS; ++channel) {
|
||||
|
@ -239,8 +239,10 @@ static int mca_btl_vader_component_register (void)
|
||||
mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
|
||||
mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit;
|
||||
|
||||
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
|
||||
|
||||
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
|
||||
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
|
||||
mca_btl_vader.super.btl_flags |= MCA_BTL_FLAGS_RDMA;
|
||||
/* Single copy mechanisms should provide better bandwidth */
|
||||
mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
|
||||
|
||||
@ -248,7 +250,6 @@ static int mca_btl_vader_component_register (void)
|
||||
mca_btl_vader.super.btl_get = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
|
||||
mca_btl_vader.super.btl_put = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
|
||||
} else {
|
||||
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE;
|
||||
mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2013 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
@ -6,6 +7,8 @@
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -162,6 +165,11 @@ static int opal_convert_string_to_jobid_should_never_be_called(opal_jobid_t *job
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static struct opal_proc_t *opal_proc_for_name_should_never_be_called (opal_process_name_t name)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_print_should_never_be_called;
|
||||
char* (*opal_vpid_print)(const opal_vpid_t) = opal_vpid_print_should_never_be_called;
|
||||
char* (*opal_jobid_print)(const opal_jobid_t) = opal_jobid_print_should_never_be_called;
|
||||
@ -169,6 +177,7 @@ int (*opal_convert_string_to_process_name)(opal_process_name_t *name, const char
|
||||
int (*opal_convert_process_name_to_string)(char** name_string, const opal_process_name_t *name) = opal_convert_process_name_to_string_should_never_be_called;
|
||||
char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid) = opal_convert_jobid_to_string_should_never_be_called;
|
||||
int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string) = opal_convert_string_to_jobid_should_never_be_called;
|
||||
struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name) = opal_proc_for_name_should_never_be_called;
|
||||
|
||||
char* opal_get_proc_hostname(const opal_proc_t *proc)
|
||||
{
|
||||
|
@ -136,6 +136,13 @@ OPAL_DECLSPEC extern char* (*opal_jobid_print)(const opal_jobid_t);
|
||||
OPAL_DECLSPEC extern char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid);
|
||||
OPAL_DECLSPEC extern int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string);
|
||||
|
||||
/**
|
||||
* Lookup an opal_proc_t by name
|
||||
*
|
||||
* @param name (IN) name to lookup
|
||||
*/
|
||||
OPAL_DECLSPEC extern struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name);
|
||||
|
||||
#define OPAL_NAME_PRINT(OPAL_PN) opal_process_name_print(OPAL_PN)
|
||||
#define OPAL_JOBID_PRINT(OPAL_PN) opal_jobid_print(OPAL_PN)
|
||||
#define OPAL_VPID_PRINT(OPAL_PN) opal_vpid_print(OPAL_PN)
|
||||
|
@ -113,6 +113,8 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
|
||||
if (NULL == oshmem_group_all) {
|
||||
osh_group->ompi_comm = &(ompi_mpi_comm_world.comm);
|
||||
} else {
|
||||
int my_rank = MPI_UNDEFINED;
|
||||
|
||||
err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
|
||||
return NULL;
|
||||
@ -132,6 +134,10 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* NTH: keep track of my rank in the new group for the workaround below */
|
||||
if (ranks[i] == ompi_comm_rank (&ompi_mpi_comm_world.comm)) {
|
||||
my_rank = i;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group);
|
||||
@ -139,6 +145,15 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
|
||||
free(ranks);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* NTH: XXX -- WORKAROUND -- The oshmem code overwrites ompi_proc_local_proc with its
|
||||
* own proc but does not update the proc list in comm world or comm self. This causes
|
||||
* the code in ompi_group_incl that updates grp_my_rank to fail. This will cause failures
|
||||
* here and when an application attempts to mix oshmem and mpi so it will really need to
|
||||
* be fixed in oshmem/proc and not here. For now we need to work around a new jenkins
|
||||
* failure so set my group ranking so we do not crash when running ompi_comm_create_group. */
|
||||
new_group->grp_my_rank = my_rank;
|
||||
|
||||
err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
|
||||
free(ranks);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user