1
1

Merge pull request #849 from hjelmn/add_procs

New add_procs behavior
Этот коммит содержится в:
Nathan Hjelm 2015-09-10 10:51:56 -06:00
родитель 2b8b544f2c ed005f2a61
Коммит 6a0c7b85bf
57 изменённых файлов: 1427 добавлений и 934 удалений

Просмотреть файл

@ -139,54 +139,8 @@ static opal_mutex_t ompi_cid_lock;
static opal_list_t ompi_registered_comms; static opal_list_t ompi_registered_comms;
/* This variable is zero (false) if all processes in MPI_COMM_WORLD
* did not require MPI_THREAD_MULTIPLE support, and is 1 (true) as
* soon as at least one process requested support for THREAD_MULTIPLE */
static int ompi_comm_world_thread_level_mult=0;
int ompi_comm_cid_init (void) int ompi_comm_cid_init (void)
{ {
#if OMPI_ENABLE_THREAD_MULTIPLE
ompi_proc_t **procs, *thisproc;
uint8_t thread_level;
uint8_t *tlpointer;
int ret;
size_t i, size, numprocs;
/** Note that the following call only returns processes
* with the same jobid. This is on purpose, since
* we switch for the dynamic communicators anyway
* to the original (slower) cid allocation algorithm.
*/
procs = ompi_proc_world ( &numprocs );
for ( i=0; i<numprocs; i++ ) {
thisproc = procs[i];
OPAL_MODEX_RECV_STRING(ret, "MPI_THREAD_LEVEL",
&thisproc->super.proc_name,
(uint8_t**)&tlpointer, &size);
if (OMPI_SUCCESS == ret) {
thread_level = *((uint8_t *) tlpointer);
if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) {
ompi_comm_world_thread_level_mult = 1;
break;
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
if (ompi_mpi_thread_multiple) {
ompi_comm_world_thread_level_mult = 1;
}
break;
} else {
return ret;
}
}
free(procs);
#else
ompi_comm_world_thread_level_mult = 0; // silence compiler warning if not used
#endif
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2006-2010 University of Houston. All rights reserved. * Copyright (c) 2006-2010 University of Houston. All rights reserved.
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. * Copyright (c) 2012-2015 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved.
* Copyright (c) 2011-2013 Universite Bordeaux 1 * Copyright (c) 2011-2013 Universite Bordeaux 1
@ -102,12 +102,26 @@ int ompi_comm_init(void)
OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t); OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t);
assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0); assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0);
group = OBJ_NEW(ompi_group_t); group = OBJ_NEW(ompi_group_t);
group->grp_proc_pointers = ompi_proc_world(&size);
group->grp_proc_count = (int)size; size = ompi_process_info.num_procs;
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
group->grp_proc_count = size;
for (size_t i = 0 ; i < size ; ++i) {
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
/* look for existing ompi_proc_t that matches this name */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name);
if (NULL == group->grp_proc_pointers[i]) {
/* set sentinel value */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name);
} else {
OBJ_RETAIN (group->grp_proc_pointers[i]);
}
}
OMPI_GROUP_SET_INTRINSIC (group); OMPI_GROUP_SET_INTRINSIC (group);
OMPI_GROUP_SET_DENSE (group); OMPI_GROUP_SET_DENSE (group);
ompi_set_group_rank(group, ompi_proc_local()); ompi_set_group_rank(group, ompi_proc_local());
ompi_group_increment_proc_count (group);
ompi_mpi_comm_world.comm.c_contextid = 0; ompi_mpi_comm_world.comm.c_contextid = 0;
ompi_mpi_comm_world.comm.c_id_start_index = 4; ompi_mpi_comm_world.comm.c_id_start_index = 4;

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2009 University of Houston. All rights reserved. * Copyright (c) 2006-2009 University of Houston. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
@ -1293,6 +1293,22 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
/**********************************************************************/ /**********************************************************************/
/**********************************************************************/ /**********************************************************************/
/**********************************************************************/ /**********************************************************************/
static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid)
{
int size = group ? ompi_group_size (group) : 0;
for (int i = 1 ; i < size ; ++i) {
opal_process_name_t name = ompi_group_get_proc_name (group, i);
if (thisjobid != ((ompi_process_name_t *) &name)->jobid) {
/* at least one is different */
return true;
}
}
return false;
}
/* All we want to do in this function is determine if the number of /* All we want to do in this function is determine if the number of
* jobids in the local and/or remote group is > 1. This tells us to * jobids in the local and/or remote group is > 1. This tells us to
* set the disconnect flag. We don't actually care what the true * set the disconnect flag. We don't actually care what the true
@ -1300,56 +1316,30 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
*/ */
void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm) void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm)
{ {
int i; bool found;
int size, rsize;
bool found=false;
ompi_jobid_t thisjobid; ompi_jobid_t thisjobid;
ompi_group_t *grp=NULL;
ompi_proc_t *proc = NULL;
/* special case for MPI_COMM_NULL */ /* special case for MPI_COMM_NULL */
if (comm == MPI_COMM_NULL) { if (comm == MPI_COMM_NULL) {
return; return;
} }
size = ompi_comm_size(comm); thisjobid = ompi_group_get_proc_name (comm->c_local_group, 0).jobid;
rsize = ompi_comm_remote_size(comm);
/* loop over all processes in local group and check for /* loop over all processes in local group and check for
* a different jobid * a different jobid
*/ */
grp = comm->c_local_group; found = ompi_dpm_group_is_dyn (comm->c_local_group, thisjobid);
proc = ompi_group_peer_lookup(grp,0); if (!found) {
thisjobid = ((ompi_process_name_t*)&proc->super.proc_name)->jobid; /* if inter-comm, loop over all processes in remote_group
* and see if any are different from thisjobid
for (i=1; i< size; i++) { */
proc = ompi_group_peer_lookup(grp,i); found = ompi_dpm_group_is_dyn (comm->c_remote_group, thisjobid);
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
/* at least one is different */
found = true;
goto complete;
}
} }
/* if inter-comm, loop over all processes in remote_group
* and see if any are different from thisjobid
*/
grp = comm->c_remote_group;
for (i=0; i< rsize; i++) {
proc = ompi_group_peer_lookup(grp,i);
if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) {
/* at least one is different */
found = true;
break;
}
}
complete:
/* if a different jobid was found, set the disconnect flag*/ /* if a different jobid was found, set the disconnect flag*/
if (found) { if (found) {
ompi_comm_num_dyncomm++; ompi_comm_num_dyncomm++;
OMPI_COMM_SET_DYNAMIC(comm); OMPI_COMM_SET_DYNAMIC(comm);
} }
return;
} }

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -49,16 +49,14 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
ompi_group_t *group2, ompi_group_t *group2,
int *ranks2) int *ranks2)
{ {
int rank, proc, proc2;
struct ompi_proc_t *proc1_pointer, *proc2_pointer;
if ( MPI_GROUP_EMPTY == group1 || MPI_GROUP_EMPTY == group2 ) { if ( MPI_GROUP_EMPTY == group1 || MPI_GROUP_EMPTY == group2 ) {
for (proc = 0; proc < n_ranks ; proc++) { for (int proc = 0; proc < n_ranks ; ++proc) {
ranks2[proc] = MPI_UNDEFINED; ranks2[proc] = MPI_UNDEFINED;
} }
return MPI_SUCCESS; return MPI_SUCCESS;
} }
#if OMPI_GROUP_SPARSE
/* /*
* If we are translating from a parent to a child that uses the sparse format * If we are translating from a parent to a child that uses the sparse format
* or vice versa, we use the translate ranks function corresponding to the * or vice versa, we use the translate ranks function corresponding to the
@ -80,8 +78,11 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
(group1,n_ranks,ranks1,group2,ranks2); (group1,n_ranks,ranks1,group2,ranks2);
} }
/* unknown sparse group type */
assert (0);
} }
else if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/
if( group2->grp_parent_group_ptr == group1 ) { /* from parent to child*/
if(OMPI_GROUP_IS_SPORADIC(group2)) { if(OMPI_GROUP_IS_SPORADIC(group2)) {
return ompi_group_translate_ranks_sporadic return ompi_group_translate_ranks_sporadic
(group1,n_ranks,ranks1,group2,ranks2); (group1,n_ranks,ranks1,group2,ranks2);
@ -95,28 +96,32 @@ int ompi_group_translate_ranks ( ompi_group_t *group1,
(group1,n_ranks,ranks1,group2,ranks2); (group1,n_ranks,ranks1,group2,ranks2);
} }
/* unknown sparse group type */
assert (0);
} }
else { #endif
/* loop over all ranks */
for (proc = 0; proc < n_ranks; proc++) { /* loop over all ranks */
rank=ranks1[proc]; for (int proc = 0; proc < n_ranks; ++proc) {
if ( MPI_PROC_NULL == rank) { struct ompi_proc_t *proc1_pointer, *proc2_pointer;
ranks2[proc] = MPI_PROC_NULL; int rank = ranks1[proc];
}
else { if ( MPI_PROC_NULL == rank) {
proc1_pointer = ompi_group_peer_lookup(group1 ,rank); ranks2[proc] = MPI_PROC_NULL;
/* initialize to no "match" */ continue;
ranks2[proc] = MPI_UNDEFINED;
for (proc2 = 0; proc2 < group2->grp_proc_count; proc2++) {
proc2_pointer= ompi_group_peer_lookup(group2, proc2);
if ( proc1_pointer == proc2_pointer) {
ranks2[proc] = proc2;
break;
}
} /* end proc2 loop */
} /* end proc loop */
} }
}
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, rank);
/* initialize to no "match" */
ranks2[proc] = MPI_UNDEFINED;
for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) {
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
if ( proc1_pointer == proc2_pointer) {
ranks2[proc] = proc2;
break;
}
} /* end proc2 loop */
} /* end proc loop */
return MPI_SUCCESS; return MPI_SUCCESS;
} }
@ -168,25 +173,6 @@ int ompi_group_dump (ompi_group_t* group)
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
/*
* This is the function that iterates through the sparse groups to the dense group
* to reach the process pointer
*/
ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank)
{
int ranks1,ranks2;
do {
if(OMPI_GROUP_IS_DENSE(group)) {
return group->grp_proc_pointers[rank];
}
ranks1 = rank;
ompi_group_translate_ranks( group, 1, &ranks1,
group->grp_parent_group_ptr,&ranks2);
rank = ranks2;
group = group->grp_parent_group_ptr;
} while (1);
}
int ompi_group_minloc ( int list[] , int length ) int ompi_group_minloc ( int list[] , int length )
{ {
int i,index,min; int i,index,min;
@ -568,3 +554,23 @@ int ompi_group_compare(ompi_group_t *group1,
return return_value; return return_value;
} }
bool ompi_group_have_remote_peers (ompi_group_t *group)
{
for (size_t i = 0 ; i < group->grp_proc_count ; ++i) {
ompi_proc_t *proc = NULL;
#if OMPI_GROUP_SPARSE
proc = ompi_group_peer_lookup (group, i);
#else
if ((intptr_t) group->grp_proc_pointers[i] < 0) {
return true;
}
proc = group->grp_proc_pointers[i];
#endif
if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
return true;
}
}
return false;
}

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -252,8 +252,6 @@ int ompi_group_free (ompi_group_t **group);
/** /**
* Functions to handle process pointers for sparse group formats * Functions to handle process pointers for sparse group formats
*/ */
OMPI_DECLSPEC ompi_proc_t* ompi_group_get_proc_ptr (ompi_group_t* group , int rank);
int ompi_group_translate_ranks_sporadic ( ompi_group_t *group1, int ompi_group_translate_ranks_sporadic ( ompi_group_t *group1,
int n_ranks, const int *ranks1, int n_ranks, const int *ranks1,
ompi_group_t *group2, ompi_group_t *group2,
@ -324,25 +322,93 @@ int ompi_group_calc_bmap ( int n, int orig_size , const int *ranks );
*/ */
int ompi_group_minloc (int list[], int length); int ompi_group_minloc (int list[], int length);
/**
* @brief Helper function for retreiving the proc of a group member in a dense group
*
* This function exists to handle the translation of sentinel group members to real
* ompi_proc_t's. If a sentinel value is found and allocate is true then this function
* looks for an existing ompi_proc_t using ompi_proc_for_name which will allocate a
* ompi_proc_t if one does not exist. If allocate is false then sentinel values translate
* to NULL.
*/
static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, const int peer_id, const bool allocate)
{
#if OPAL_ENABLE_DEBUG
if (peer_id >= group->grp_proc_count) {
opal_output(0, "ompi_group_dense_lookup: invalid peer index (%d)", peer_id);
return (struct ompi_proc_t *) NULL;
}
#endif
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
if (!allocate) {
return NULL;
}
/* replace sentinel value with an actual ompi_proc_t */
group->grp_proc_pointers[peer_id] =
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id]));
OBJ_RETAIN(group->grp_proc_pointers[peer_id]);
}
return group->grp_proc_pointers[peer_id];
}
/*
* This is the function that iterates through the sparse groups to the dense group
* to reach the process pointer
*/
static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int rank, const bool allocate)
{
#if OMPI_GROUP_SPARSE
do {
if (OMPI_GROUP_IS_DENSE(group)) {
return ompi_group_dense_lookup (group, peer_id, allocate);
}
int ranks1 = rank;
ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank);
group = group->grp_parent_group_ptr;
} while (1);
#else
return ompi_group_dense_lookup (group, rank, allocate);
#endif
}
/**
* @brief Get the raw proc pointer from the group
*
* This function will either return a ompi_proc_t if one exists (either stored in the group
* or cached in the proc hash table) or a sentinel value representing the proc. This
* differs from ompi_group_get_proc_ptr() which returns the ompi_proc_t or NULL.
*/
ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank);
static inline opal_process_name_t ompi_group_get_proc_name (ompi_group_t *group, int rank)
{
ompi_proc_t *proc = ompi_group_get_proc_ptr_raw (group, rank);
if ((intptr_t) proc < 0) {
return ompi_proc_sentinel_to_name ((intptr_t) proc);
}
return proc->super.proc_name;
}
/** /**
* Inline function to check if sparse groups are enabled and return the direct access * Inline function to check if sparse groups are enabled and return the direct access
* to the proc pointer, otherwise the lookup function * to the proc pointer, otherwise the lookup function
*/ */
static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, int peer_id) static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, int peer_id)
{ {
#if OPAL_ENABLE_DEBUG return ompi_group_get_proc_ptr (group, peer_id, true);
if (peer_id >= group->grp_proc_count) {
opal_output(0, "ompi_group_lookup_peer: invalid peer index (%d)", peer_id);
return (struct ompi_proc_t *) NULL;
}
#endif
#if OMPI_GROUP_SPARSE
return ompi_group_get_proc_ptr (group, peer_id);
#else
return group->grp_proc_pointers[peer_id];
#endif
} }
static inline struct ompi_proc_t *ompi_group_peer_lookup_existing (ompi_group_t *group, int peer_id)
{
return ompi_group_get_proc_ptr (group, peer_id, false);
}
bool ompi_group_have_remote_peers (ompi_group_t *group);
/** /**
* Function to print the group info * Function to print the group info
*/ */

Просмотреть файл

@ -210,14 +210,13 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size)
*/ */
void ompi_group_increment_proc_count(ompi_group_t *group) void ompi_group_increment_proc_count(ompi_group_t *group)
{ {
int proc;
ompi_proc_t * proc_pointer; ompi_proc_t * proc_pointer;
for (proc = 0; proc < group->grp_proc_count; proc++) { for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
proc_pointer = ompi_group_peer_lookup(group,proc); proc_pointer = ompi_group_peer_lookup_existing (group, proc);
OBJ_RETAIN(proc_pointer); if (proc_pointer) {
OBJ_RETAIN(proc_pointer);
}
} }
return;
} }
/* /*
@ -226,14 +225,13 @@ void ompi_group_increment_proc_count(ompi_group_t *group)
void ompi_group_decrement_proc_count(ompi_group_t *group) void ompi_group_decrement_proc_count(ompi_group_t *group)
{ {
int proc;
ompi_proc_t * proc_pointer; ompi_proc_t * proc_pointer;
for (proc = 0; proc < group->grp_proc_count; proc++) { for (int proc = 0 ; proc < group->grp_proc_count ; ++proc) {
proc_pointer = ompi_group_peer_lookup(group,proc); proc_pointer = ompi_group_peer_lookup_existing (group, proc);
OBJ_RELEASE(proc_pointer); if (proc_pointer) {
OBJ_RELEASE(proc_pointer);
}
} }
return;
} }
/* /*
@ -255,9 +253,6 @@ static void ompi_group_construct(ompi_group_t *new_group)
/* default the sparse values for groups */ /* default the sparse values for groups */
new_group->grp_parent_group_ptr = NULL; new_group->grp_parent_group_ptr = NULL;
/* return */
return;
} }
@ -300,9 +295,6 @@ static void ompi_group_destruct(ompi_group_t *group)
opal_pointer_array_set_item(&ompi_group_f_to_c_table, opal_pointer_array_set_item(&ompi_group_f_to_c_table,
group->grp_f_to_c_index, NULL); group->grp_f_to_c_index, NULL);
} }
/* return */
return;
} }

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 University of Houston. All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -29,6 +29,66 @@
#include <math.h> #include <math.h>
static int ompi_group_dense_overlap (ompi_group_t *group1, ompi_group_t *group2, opal_bitmap_t *bitmap)
{
ompi_proc_t *proc1_pointer, *proc2_pointer;
int rc, overlap_count;
overlap_count = 0;
for (int proc1 = 0 ; proc1 < group1->grp_proc_count ; ++proc1) {
proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1);
/* check to see if this proc is in group2 */
for (int proc2 = 0 ; proc2 < group2->grp_proc_count ; ++proc2) {
proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
if( proc1_pointer == proc2_pointer ) {
rc = opal_bitmap_set_bit (bitmap, proc2);
if (OPAL_SUCCESS != rc) {
return rc;
}
++overlap_count;
break;
}
} /* end proc1 loop */
} /* end proc loop */
return overlap_count;
}
static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, const int peer_id)
{
if (OPAL_UNLIKELY((intptr_t) group->grp_proc_pointers[peer_id] < 0)) {
ompi_proc_t *proc =
(ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((intptr_t) group->grp_proc_pointers[peer_id]));
if (NULL != proc) {
/* replace sentinel value with an actual ompi_proc_t */
group->grp_proc_pointers[peer_id] = proc;
/* retain the proc */
OBJ_RETAIN(group->grp_proc_pointers[peer_id]);
}
}
return group->grp_proc_pointers[peer_id];
}
ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank)
{
#if OMPI_GROUP_SPARSE
do {
if (OMPI_GROUP_IS_DENSE(group)) {
return ompi_group_dense_lookup_raw (group, peer_id);
}
int ranks1 = rank;
ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank);
group = group->grp_parent_group_ptr;
} while (1);
#else
return ompi_group_dense_lookup_raw (group, rank);
#endif
}
int ompi_group_calc_plist ( int n , const int *ranks ) { int ompi_group_calc_plist ( int n , const int *ranks ) {
return sizeof(char *) * n ; return sizeof(char *) * n ;
} }
@ -37,9 +97,8 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
ompi_group_t **new_group) ompi_group_t **new_group)
{ {
/* local variables */ /* local variables */
int proc,my_group_rank; int my_group_rank;
ompi_group_t *group_pointer, *new_group_pointer; ompi_group_t *group_pointer, *new_group_pointer;
ompi_proc_t *my_proc_pointer;
group_pointer = (ompi_group_t *)group; group_pointer = (ompi_group_t *)group;
@ -56,9 +115,9 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
} }
/* put group elements in the list */ /* put group elements in the list */
for (proc = 0; proc < n; proc++) { for (int proc = 0; proc < n; proc++) {
new_group_pointer->grp_proc_pointers[proc] = new_group_pointer->grp_proc_pointers[proc] =
ompi_group_peer_lookup(group_pointer,ranks[proc]); ompi_group_get_proc_ptr_raw (group_pointer, ranks[proc]);
} /* end proc loop */ } /* end proc loop */
/* increment proc reference counters */ /* increment proc reference counters */
@ -67,10 +126,8 @@ int ompi_group_incl_plist(ompi_group_t* group, int n, const int *ranks,
/* find my rank */ /* find my rank */
my_group_rank=group_pointer->grp_my_rank; my_group_rank=group_pointer->grp_my_rank;
if (MPI_UNDEFINED != my_group_rank) { if (MPI_UNDEFINED != my_group_rank) {
my_proc_pointer=ompi_group_peer_lookup (group_pointer,my_group_rank); ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
ompi_set_group_rank(new_group_pointer,my_proc_pointer); } else {
}
else {
new_group_pointer->grp_my_rank = MPI_UNDEFINED; new_group_pointer->grp_my_rank = MPI_UNDEFINED;
} }
@ -87,114 +144,77 @@ int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2,
ompi_group_t **new_group) ompi_group_t **new_group)
{ {
/* local variables */ /* local variables */
int new_group_size, proc1, proc2, found_in_group; int new_group_size, cnt, rc, overlap_count;
int my_group_rank, cnt; ompi_group_t *new_group_pointer;
ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer; ompi_proc_t *proc2_pointer;
ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL; opal_bitmap_t bitmap;
group1_pointer = (ompi_group_t *) group1;
group2_pointer = (ompi_group_t *) group2;
/* /*
* form union * form union
*/ */
/* get new group size */ /* get new group size */
new_group_size = group1_pointer->grp_proc_count; OBJ_CONSTRUCT(&bitmap, opal_bitmap_t);
rc = opal_bitmap_init (&bitmap, 32);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* check group2 elements to see if they need to be included in the list */ /* check group2 elements to see if they need to be included in the list */
for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) { overlap_count = ompi_group_dense_overlap (group1, group2, &bitmap);
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2); if (0 > overlap_count) {
OBJ_DESTRUCT(&bitmap);
/* check to see if this proc2 is alread in the group */ return overlap_count;
found_in_group = 0; }
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
if (proc1_pointer == proc2_pointer) {
/* proc2 is in group1 - don't double count */
found_in_group = 1;
break;
}
} /* end proc1 loop */
if (found_in_group) {
continue;
}
new_group_size++;
} /* end proc loop */
new_group_size = group1->grp_proc_count + group2->grp_proc_count - overlap_count;
if ( 0 == new_group_size ) { if ( 0 == new_group_size ) {
*new_group = MPI_GROUP_EMPTY; *new_group = MPI_GROUP_EMPTY;
OBJ_RETAIN(MPI_GROUP_EMPTY); OBJ_RETAIN(MPI_GROUP_EMPTY);
OBJ_DESTRUCT(&bitmap);
return MPI_SUCCESS; return MPI_SUCCESS;
} }
/* get new group struct */ /* get new group struct */
new_group_pointer = ompi_group_allocate(new_group_size); new_group_pointer = ompi_group_allocate(new_group_size);
if (NULL == new_group_pointer) { if (NULL == new_group_pointer) {
OBJ_DESTRUCT(&bitmap);
return MPI_ERR_GROUP; return MPI_ERR_GROUP;
} }
/* fill in the new group list */ /* fill in the new group list */
/* put group1 elements in the list */ /* put group1 elements in the list */
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) { for (int proc1 = 0; proc1 < group1->grp_proc_count; ++proc1) {
new_group_pointer->grp_proc_pointers[proc1] = new_group_pointer->grp_proc_pointers[proc1] =
ompi_group_peer_lookup(group1_pointer,proc1); ompi_group_get_proc_ptr_raw (group1, proc1);
} }
cnt = group1_pointer->grp_proc_count; cnt = group1->grp_proc_count;
/* check group2 elements to see if they need to be included in the list */ /* check group2 elements to see if they need to be included in the list */
for (proc2 = 0; proc2 < group2_pointer->grp_proc_count; proc2++) { for (int proc2 = 0; proc2 < group2->grp_proc_count; ++proc2) {
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2); if (opal_bitmap_is_set_bit (&bitmap, proc2)) {
/* check to see if this proc2 is alread in the group */
found_in_group = 0;
for (proc1 = 0; proc1 < group1_pointer->grp_proc_count; proc1++) {
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1);
if (proc1_pointer == proc2_pointer) {
/* proc2 is in group1 - don't double count */
found_in_group = 1;
break;
}
} /* end proc1 loop */
if (found_in_group) {
continue; continue;
} }
new_group_pointer->grp_proc_pointers[cnt] = proc2_pointer = ompi_group_get_proc_ptr_raw (group2, proc2);
ompi_group_peer_lookup(group2_pointer,proc2); new_group_pointer->grp_proc_pointers[cnt++] = proc2_pointer;
cnt++;
} /* end proc loop */ } /* end proc loop */
OBJ_DESTRUCT(&bitmap);
/* increment proc reference counters */ /* increment proc reference counters */
ompi_group_increment_proc_count(new_group_pointer); ompi_group_increment_proc_count(new_group_pointer);
/* find my rank */ /* find my rank */
my_group_rank = group1_pointer->grp_my_rank; if (MPI_UNDEFINED != group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) {
if (MPI_UNDEFINED == my_group_rank) { ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
my_group_rank = group2_pointer->grp_my_rank;
if ( MPI_UNDEFINED != my_group_rank) {
my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank);
}
} else { } else {
my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank);
}
if ( MPI_UNDEFINED == my_group_rank ) {
new_group_pointer->grp_my_rank = MPI_UNDEFINED; new_group_pointer->grp_my_rank = MPI_UNDEFINED;
} }
else {
ompi_set_group_rank(new_group_pointer, my_proc_pointer);
}
*new_group = (MPI_Group) new_group_pointer; *new_group = (MPI_Group) new_group_pointer;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -206,96 +226,65 @@ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2,
ompi_group_t **new_group) { ompi_group_t **new_group) {
/* local varibles */ /* local varibles */
int new_group_size, proc1, proc2, found_in_group2, cnt; int new_group_size, overlap_count, rc;
int my_group_rank; ompi_group_t *new_group_pointer;
ompi_group_t *group1_pointer, *group2_pointer, *new_group_pointer; ompi_proc_t *proc1_pointer;
ompi_proc_t *proc1_pointer, *proc2_pointer, *my_proc_pointer = NULL; opal_bitmap_t bitmap;
group1_pointer=(ompi_group_t *)group1;
group2_pointer=(ompi_group_t *)group2;
/* /*
* form union * form union
*/ */
/* get new group size */ /* get new group size */
new_group_size=0; OBJ_CONSTRUCT(&bitmap, opal_bitmap_t);
rc = opal_bitmap_init (&bitmap, 32);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* loop over group1 members */ /* check group2 elements to see if they need to be included in the list */
for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) { overlap_count = ompi_group_dense_overlap (group2, group1, &bitmap);
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1); if (0 > overlap_count) {
/* check to see if this proc is in group2 */ OBJ_DESTRUCT(&bitmap);
found_in_group2=0; return overlap_count;
for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) { }
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
if( proc1_pointer == proc2_pointer ) {
found_in_group2=true;
break;
}
} /* end proc1 loop */
if(found_in_group2) {
continue;
}
new_group_size++;
} /* end proc loop */
new_group_size = group1->grp_proc_count - overlap_count;
if ( 0 == new_group_size ) { if ( 0 == new_group_size ) {
*new_group = MPI_GROUP_EMPTY; *new_group = MPI_GROUP_EMPTY;
OBJ_RETAIN(MPI_GROUP_EMPTY); OBJ_RETAIN(MPI_GROUP_EMPTY);
OBJ_DESTRUCT(&bitmap);
return MPI_SUCCESS; return MPI_SUCCESS;
} }
/* allocate a new ompi_group_t structure */ /* allocate a new ompi_group_t structure */
new_group_pointer=ompi_group_allocate(new_group_size); new_group_pointer = ompi_group_allocate(new_group_size);
if( NULL == new_group_pointer ) { if( NULL == new_group_pointer ) {
OBJ_DESTRUCT(&bitmap);
return MPI_ERR_GROUP; return MPI_ERR_GROUP;
} }
/* fill in group list */ /* fill in group list */
cnt=0;
/* loop over group1 members */ /* loop over group1 members */
for( proc1=0; proc1 < group1_pointer->grp_proc_count; proc1++ ) { for (int proc1 = 0, cnt = 0 ; proc1 < group1->grp_proc_count ; ++proc1) {
proc1_pointer = ompi_group_peer_lookup(group1_pointer,proc1); if (opal_bitmap_is_set_bit (&bitmap, proc1)) {
/* check to see if this proc is in group2 */
found_in_group2=0;
for( proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) {
proc2_pointer = ompi_group_peer_lookup(group2_pointer,proc2);
if( proc1_pointer == proc2_pointer ) {
found_in_group2=true;
break;
}
} /* end proc1 loop */
if(found_in_group2) {
continue; continue;
} }
new_group_pointer->grp_proc_pointers[cnt] = proc1_pointer = ompi_group_get_proc_ptr_raw (group1, proc1);
ompi_group_peer_lookup(group1_pointer,proc1); new_group_pointer->grp_proc_pointers[cnt++] = proc1_pointer;
cnt++;
} /* end proc loop */ } /* end proc loop */
OBJ_DESTRUCT(&bitmap);
/* increment proc reference counters */ /* increment proc reference counters */
ompi_group_increment_proc_count(new_group_pointer); ompi_group_increment_proc_count(new_group_pointer);
/* find my rank */ /* find my rank */
my_group_rank=group1_pointer->grp_my_rank; if (MPI_UNDEFINED == group1->grp_my_rank || MPI_UNDEFINED != group2->grp_my_rank) {
if ( MPI_UNDEFINED != my_group_rank ) {
my_proc_pointer = ompi_group_peer_lookup(group1_pointer,my_group_rank);
}
else {
my_group_rank=group2_pointer->grp_my_rank;
if ( MPI_UNDEFINED != my_group_rank ) {
my_proc_pointer = ompi_group_peer_lookup(group2_pointer,my_group_rank);
}
}
if ( MPI_UNDEFINED == my_group_rank ) {
new_group_pointer->grp_my_rank = MPI_UNDEFINED; new_group_pointer->grp_my_rank = MPI_UNDEFINED;
} } else {
else { ompi_set_group_rank(new_group_pointer, ompi_proc_local_proc);
ompi_set_group_rank(new_group_pointer,my_proc_pointer);
} }
*new_group = (MPI_Group)new_group_pointer; *new_group = (MPI_Group)new_group_pointer;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 University of Houston. All rights reserved. * Copyright (c) 2006-2007 University of Houston. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -38,12 +41,10 @@ void ompi_set_group_rank(ompi_group_t *group, struct ompi_proc_t *proc_pointer)
for (proc = 0; proc < group->grp_proc_count; proc++) { for (proc = 0; proc < group->grp_proc_count; proc++) {
/* check and see if this proc pointer matches proc_pointer /* check and see if this proc pointer matches proc_pointer
*/ */
if (ompi_group_peer_lookup(group,proc) == proc_pointer) { if (ompi_group_peer_lookup_existing (group, proc) == proc_pointer) {
group->grp_my_rank = proc; group->grp_my_rank = proc;
} break;
}
} /* end proc loop */ } /* end proc loop */
} }
/* return */
return;
} }

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California. * Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -25,6 +28,7 @@
#include "ompi/mca/mca.h" #include "ompi/mca/mca.h"
#include "opal/mca/base/mca_base_framework.h" #include "opal/mca/base/mca_base_framework.h"
#include "ompi/mca/bml/bml.h" #include "ompi/mca/bml/bml.h"
#include "ompi/proc/proc.h"
/* /*
@ -60,6 +64,14 @@ OMPI_DECLSPEC extern mca_bml_base_component_t mca_bml_component;
OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml; OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml;
OMPI_DECLSPEC extern mca_base_framework_t ompi_bml_base_framework; OMPI_DECLSPEC extern mca_base_framework_t ompi_bml_base_framework;
static inline struct mca_bml_base_endpoint_t *mca_bml_base_get_endpoint (struct ompi_proc_t *proc) {
if (OPAL_UNLIKELY(NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML])) {
mca_bml.bml_add_proc (proc);
}
return (struct mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
}
END_C_DECLS END_C_DECLS
#endif /* MCA_BML_BASE_H */ #endif /* MCA_BML_BASE_H */

Просмотреть файл

@ -160,14 +160,11 @@ static inline bool mca_bml_base_btl_array_remove( mca_bml_base_btl_array_t* arra
*/ */
static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_index(mca_bml_base_btl_array_t* array, size_t item_index) static inline mca_bml_base_btl_t* mca_bml_base_btl_array_get_index(mca_bml_base_btl_array_t* array, size_t item_index)
{ {
#if OPAL_ENABLE_DEBUG if (item_index < array->arr_size) {
if(item_index >= array->arr_size) { return &array->bml_btls[item_index];
opal_output(0, "mca_bml_base_btl_array_get_index: invalid array index %lu >= %lu",
(unsigned long)item_index, (unsigned long)array->arr_size);
return 0;
} }
#endif
return &array->bml_btls[item_index]; return NULL;
} }
/** /**
@ -441,7 +438,7 @@ typedef int (*mca_bml_base_module_finalize_fn_t)( void );
* @return OMPI_SUCCESS or error status on failure. * @return OMPI_SUCCESS or error status on failure.
* *
* The mca_bml_base_module_add_procs_fn_t() is called by the PML to * The mca_bml_base_module_add_procs_fn_t() is called by the PML to
* determine the set of BMLs that should be used to reach each process. * determine the set of BTLs that should be used to reach each process.
* Any addressing information exported by the peer via the mca_base_modex_send() * Any addressing information exported by the peer via the mca_base_modex_send()
* function should be available during this call via the corresponding * function should be available during this call via the corresponding
* mca_base_modex_recv() function. The BML may utilize this information to * mca_base_modex_recv() function. The BML may utilize this information to
@ -465,6 +462,25 @@ typedef int (*mca_bml_base_module_add_procs_fn_t)(
struct opal_bitmap_t* reachable struct opal_bitmap_t* reachable
); );
/**
* PML->BML notification of change in the process list.
*
* @param proc (IN) Process
* @return OMPI_SUCCESS or error status on failure.
*
* The mca_bml_base_module_add_proc_fn_t() is called by the PML to
* determine the set of BTLs that should be used to reach each process.
* Any addressing information exported by the peer via the mca_base_modex_send()
* function should be available during this call via the corresponding
* mca_base_modex_recv() function. The BML may utilize this information to
* determine reachability of each peer process.
*
* \note This function will return OMPI_ERR_UNREACH if the process can not
* be reached by a currently active BTL. This is not a fatal error, and the
* calling layer is free to continue using the BML interface.
*/
typedef int (*mca_bml_base_module_add_proc_fn_t) (struct ompi_proc_t *proc);
/** /**
* Notification of change to the process list. * Notification of change to the process list.
* *
@ -559,6 +575,7 @@ struct mca_bml_base_module_t {
mca_bml_base_component_t* bml_component; /**< pointer back to the BML component structure */ mca_bml_base_component_t* bml_component; /**< pointer back to the BML component structure */
/* BML function table */ /* BML function table */
mca_bml_base_module_add_proc_fn_t bml_add_proc;
mca_bml_base_module_add_procs_fn_t bml_add_procs; mca_bml_base_module_add_procs_fn_t bml_add_procs;
mca_bml_base_module_del_procs_fn_t bml_del_procs; mca_bml_base_module_del_procs_fn_t bml_del_procs;
mca_bml_base_module_add_btl_fn_t bml_add_btl; mca_bml_base_module_add_btl_fn_t bml_add_btl;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California. * Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved * Copyright (c) 2013 Intel, Inc. All rights reserved
@ -144,6 +144,293 @@ static void mca_bml_r2_calculate_bandwidth_latency (mca_bml_base_btl_array_t *bt
} }
} }
static mca_bml_base_endpoint_t *mca_bml_r2_allocate_endpoint (ompi_proc_t *proc) {
mca_bml_base_endpoint_t *bml_endpoint;
/* allocate bml specific proc data */
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
if (NULL == bml_endpoint) {
opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
return NULL;
}
/* preallocate space in array for max number of r2s */
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_max_send_size = -1;
bml_endpoint->btl_proc = proc;
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
bml_endpoint->btl_flags_or = 0;
return bml_endpoint;
}
static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl)
{
if (NULL != btl->btl_component->btl_progress) {
bool found = false;
for (size_t p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
found = true;
break;
}
}
if (found == false) {
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
btl->btl_component->btl_progress;
opal_progress_register (btl->btl_component->btl_progress);
}
}
}
static int mca_bml_r2_endpoint_add_btl (struct ompi_proc_t *proc, mca_bml_base_endpoint_t *bml_endpoint,
mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *btl_endpoint)
{
mca_bml_base_btl_t* bml_btl = NULL;
int btl_flags = btl->btl_flags;
bool btl_in_use = false;
size_t size;
/* NTH: these flags should have been sanitized by the btl. Once that is verified these
* checks can be safely removed. */
if ((btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put)) {
opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
" the %s BTL without any PUT function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_PUT;
}
if ((btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get)) {
opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
" the %s BTL without any GET function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_GET;
}
if ((btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0) {
/* If no protocol specified, we have 2 choices: we ignore the BTL
* as we don't know which protocl to use, or we suppose that all
* BTLs support the send protocol. This is really a btl error as
* these flags should have been sanitized by the btl. */
btl_flags |= MCA_BTL_FLAGS_SEND;
}
if (btl_flags & MCA_BTL_FLAGS_SEND) {
/* dont allow an additional BTL with a lower exclusivity ranking */
bml_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_send, size - 1);
size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
if (!bml_btl || bml_btl->btl->btl_exclusivity < btl->btl_exclusivity) {
/* this btl has higher exclusivity than an existing btl or none exists */
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"mca: bml: Using %s btl for send to %s on node %s",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name),
proc->super.proc_hostname);
/* cache the endpoint on the proc */
if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) {
bml_btl = mca_bml_base_btl_array_insert (&bml_endpoint->btl_send);
bml_btl->btl = btl;
bml_btl->btl_endpoint = btl_endpoint;
bml_btl->btl_weight = 0;
bml_btl->btl_flags = btl_flags;
/**
* calculate the bitwise OR of the btl flags
*/
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
} else {
opal_output_verbose(20, opal_btl_base_framework.framework_output,
"mca: bml: Not using %s btl for send to %s on node %s "
"because %s btl has higher exclusivity (%d > %d)",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname,
bml_btl->btl->btl_component->btl_version.mca_component_name,
bml_btl->btl->btl_exclusivity,
btl->btl_exclusivity);
}
btl_in_use = true;
}
}
/* always add rdma endpoints */
if ((btl_flags & MCA_BTL_FLAGS_RDMA) &&
!((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) &&
(0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
bml_btl_rdma->btl = btl;
bml_btl_rdma->btl_endpoint = btl_endpoint;
bml_btl_rdma->btl_weight = 0;
bml_btl_rdma->btl_flags = btl_flags;
if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) {
bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length;
}
if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) {
bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size;
}
btl_in_use = true;
}
return btl_in_use ? OMPI_SUCCESS : OMPI_ERR_NOT_AVAILABLE;
}
static void mca_bml_r2_compute_endpoint_metrics (mca_bml_base_endpoint_t *bml_endpoint)
{
double total_bandwidth = 0;
uint32_t latency;
size_t n_send, n_rdma;
/* (1) determine the total bandwidth available across all btls
* note that we need to do this here, as we may already have btls configured
* (2) determine the highest priority ranking for latency
* (3) compute the maximum amount of bytes that can be send without any
* weighting. Once the left over is smaller than this number we will
* start using the weight to compute the correct amount.
*/
n_send = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
n_rdma = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
/* sort BTLs in descending order according to bandwidth value */
qsort (bml_endpoint->btl_send.bml_btls, n_send,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
bml_endpoint->btl_rdma_index = 0;
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency);
/* (1) set the weight of each btl as a percentage of overall bandwidth
* (2) copy all btl instances at the highest priority ranking into the
* list of btls used for first fragments
*/
for (size_t n_index = 0 ; n_index < n_send ; ++n_index) {
mca_bml_base_btl_t *bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
mca_btl_base_module_t *btl = bml_btl->btl;
/* compute weighting factor for this r2 */
if(btl->btl_bandwidth > 0) {
bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_send);
}
/* check to see if this r2 is already in the array of r2s
* used for first fragments - if not add it.
*/
if(btl->btl_latency == latency) {
mca_bml_base_btl_t* bml_btl_new =
mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
*bml_btl_new = *bml_btl;
}
/* set endpoint max send size as min of available btls */
if (bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
bml_endpoint->btl_max_send_size = btl->btl_max_send_size;
}
/* sort BTLs in descending order according to bandwidth value */
qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency);
/* set rdma btl weights */
for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) {
mca_bml_base_btl_t *bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index);
/* compute weighting factor for this r2 */
if (bml_btl->btl->btl_bandwidth > 0.0) {
bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_rdma);
}
}
}
static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
{
mca_bml_base_endpoint_t *bml_endpoint;
/* at least one btl is in use */
bool btl_in_use;
int rc;
if (OPAL_UNLIKELY(NULL == proc)) {
return OMPI_ERR_BAD_PARAM;
}
/* check if this endpoint is already set up */
if (NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
OBJ_RETAIN(proc);
return OMPI_SUCCESS;
}
/* add btls if not already done */
if (OMPI_SUCCESS != (rc = mca_bml_r2_add_btls())) {
return rc;
}
bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
if (OPAL_UNLIKELY(NULL == bml_endpoint)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (int p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
struct mca_btl_base_endpoint_t *btl_endpoint = NULL;
/* if the r2 can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap
* and can return addressing information for each proc
* that is passed back to the r2 on data transfer calls
*/
rc = btl->btl_add_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint, NULL);
if (OMPI_SUCCESS != rc || NULL == btl_endpoint) {
/* This BTL has troubles adding the nodes. Let's continue maybe some other BTL
* can take care of this task. */
continue;
}
rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoint);
if (OMPI_SUCCESS != rc) {
btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint);
} else {
mca_bml_r2_register_progress (btl);
btl_in_use = true;
}
}
if (!btl_in_use) {
/* no btl is available for this proc */
if (mca_bml_r2.show_unreach_errors) {
opal_show_help ("help-mca-bml-r2.txt", "unreachable proc", true,
OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
(NULL != ompi_proc_local_proc->super.proc_hostname ?
ompi_proc_local_proc->super.proc_hostname : "unknown!"),
OMPI_NAME_PRINT(&(proc->super.proc_name)),
(NULL != proc->super.proc_hostname ?
proc->super.proc_hostname : "unknown!"),
btl_names);
}
return OMPI_ERR_UNREACH;
}
/* compute metrics for registered btls */
mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
return OMPI_SUCCESS;
}
/* /*
* For each proc setup a datastructure that indicates the BTLs * For each proc setup a datastructure that indicates the BTLs
* that can be used to reach the destination. * that can be used to reach the destination.
@ -154,7 +441,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
struct ompi_proc_t** procs, struct ompi_proc_t** procs,
struct opal_bitmap_t* reachable ) struct opal_bitmap_t* reachable )
{ {
size_t p, p_index, n_new_procs = 0; size_t n_new_procs = 0;
struct mca_btl_base_endpoint_t ** btl_endpoints = NULL; struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;
struct ompi_proc_t** new_procs = NULL; struct ompi_proc_t** new_procs = NULL;
int rc, ret = OMPI_SUCCESS; int rc, ret = OMPI_SUCCESS;
@ -170,7 +457,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
/* Select only the procs that don't yet have the BML proc struct. This prevent /* Select only the procs that don't yet have the BML proc struct. This prevent
* us from calling btl->add_procs several times on the same destination proc. * us from calling btl->add_procs several times on the same destination proc.
*/ */
for(p_index = 0; p_index < nprocs; p_index++) { for (size_t p_index = 0 ; p_index < nprocs ; ++p_index) {
struct ompi_proc_t* proc = procs[p_index]; struct ompi_proc_t* proc = procs[p_index];
if(NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { if(NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
@ -203,10 +490,9 @@ static int mca_bml_r2_add_procs( size_t nprocs,
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
for(p_index = 0; p_index < mca_bml_r2.num_btl_modules; p_index++) { for (size_t p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
mca_btl_base_module_t* btl = mca_bml_r2.btl_modules[p_index]; mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
int btl_inuse = 0; int btl_inuse = 0;
int btl_flags;
/* if the r2 can reach the destination proc it sets the /* if the r2 can reach the destination proc it sets the
* corresponding bit (proc index) in the reachable bitmap * corresponding bit (proc index) in the reachable bitmap
@ -217,240 +503,69 @@ static int mca_bml_r2_add_procs( size_t nprocs,
memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*)); memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*));
rc = btl->btl_add_procs(btl, n_new_procs, (opal_proc_t**)new_procs, btl_endpoints, reachable); rc = btl->btl_add_procs(btl, n_new_procs, (opal_proc_t**)new_procs, btl_endpoints, reachable);
if(OMPI_SUCCESS != rc) { if (OMPI_SUCCESS != rc) {
/* This BTL has troubles adding the nodes. Let's continue maybe some other BTL /* This BTL encountered an error while adding procs. Continue in case some other
* can take care of this task. * BTL(s) can be used. */
*/
continue; continue;
} }
/* for each proc that is reachable */ /* for each proc that is reachable */
for( p = 0; p < n_new_procs; p++ ) { for (size_t p = 0 ; p < n_new_procs ; ++p) {
if(opal_bitmap_is_set_bit(reachable, p)) { if (!opal_bitmap_is_set_bit(reachable, p)) {
ompi_proc_t *proc = new_procs[p]; continue;
mca_bml_base_endpoint_t * bml_endpoint =
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_bml_base_btl_t* bml_btl = NULL;
size_t size;
if(NULL == bml_endpoint) {
/* allocate bml specific proc data */
bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
if (NULL == bml_endpoint) {
opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources");
free(btl_endpoints);
free(new_procs);
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* preallocate space in array for max number of r2s */
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
bml_endpoint->btl_max_send_size = -1;
bml_endpoint->btl_proc = proc;
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
bml_endpoint->btl_flags_or = 0;
}
btl_flags = btl->btl_flags;
if( (btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) {
opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
" the %s BTL without any PUT function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_PUT;
}
if( (btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) {
opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
" the %s BTL without any GET function attached. Discard the flag !",
btl->btl_component->btl_version.mca_component_name);
btl_flags ^= MCA_BTL_FLAGS_GET;
}
if( (btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
/**
* If no protocol specified, we have 2 choices: we ignore the BTL
* as we don't know which protocl to use, or we suppose that all
* BTLs support the send protocol.
*/
btl_flags |= MCA_BTL_FLAGS_SEND;
}
/* dont allow an additional BTL with a lower exclusivity ranking */
size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
if(size > 0) {
bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, size-1);
/* skip this btl if the exclusivity is less than the previous only if the btl does not provide full rdma (for one-sided) */
if(bml_btl->btl->btl_exclusivity > btl->btl_exclusivity && ((btl_flags & MCA_BTL_FLAGS_RDMA) != MCA_BTL_FLAGS_RDMA)) {
btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]);
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_bml_base_framework.framework_output,
"mca: bml: Not using %s btl to %s on node %s "
"because %s btl has higher exclusivity (%d > %d)",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname,
bml_btl->btl->btl_component->btl_version.mca_component_name,
bml_btl->btl->btl_exclusivity,
btl->btl_exclusivity);
continue;
}
}
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_bml_base_framework.framework_output,
"mca: bml: Using %s btl to %s on node %s",
btl->btl_component->btl_version.mca_component_name,
OMPI_NAME_PRINT(&proc->super.proc_name),
proc->super.proc_hostname);
/* cache the endpoint on the proc */
if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) {
bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
bml_btl->btl = btl;
bml_btl->btl_endpoint = btl_endpoints[p];
bml_btl->btl_weight = 0;
bml_btl->btl_flags = btl_flags;
/**
* calculate the bitwise OR of the btl flags
*/
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
}
/* always add rdma endpoints */
if ((btl_flags & MCA_BTL_FLAGS_RDMA) &&
!((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) &&
(0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
bml_btl_rdma->btl = btl;
bml_btl_rdma->btl_endpoint = btl_endpoints[p];
bml_btl_rdma->btl_weight = 0;
bml_btl_rdma->btl_flags = btl_flags;
if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) {
bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length;
}
if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) {
bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size;
}
}
/* This BTL is in use, allow the progress registration */
btl_inuse++;
} }
ompi_proc_t *proc = new_procs[p];
mca_bml_base_endpoint_t *bml_endpoint =
(mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_bml_base_btl_t *bml_btl = NULL;
size_t size;
if (NULL == bml_endpoint) {
bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
if (NULL == bml_endpoint) {
free(btl_endpoints);
free(new_procs);
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoints[p]);
if (OMPI_SUCCESS != rc) {
btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]);
continue;
}
/* This BTL is in use, allow the progress registration */
btl_inuse++;
} }
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) { if (btl_inuse) {
size_t p; mca_bml_r2_register_progress (btl);
bool found = false;
for( p = 0; p < mca_bml_r2.num_btl_progress; p++ ) {
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
found = true;
break;
}
}
if(found == false) {
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress] =
btl->btl_component->btl_progress;
mca_bml_r2.num_btl_progress++;
opal_progress_register( btl->btl_component->btl_progress );
}
} }
} }
free(btl_endpoints); free(btl_endpoints);
/* iterate back through procs and compute metrics for registered r2s */ /* iterate back through procs and compute metrics for registered r2s */
for(p=0; p<n_new_procs; p++) { for (size_t p = 0; p < n_new_procs ; ++p) {
ompi_proc_t *proc = new_procs[p]; mca_bml_base_endpoint_t *bml_endpoint =
mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t *) new_procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
double total_bandwidth = 0;
uint32_t latency;
size_t n_send, n_rdma;
/* skip over procs w/ no btl's registered */ /* skip over procs w/ no btl's registered */
if(NULL == bml_endpoint) { if (NULL != bml_endpoint) {
continue; mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
}
/* (1) determine the total bandwidth available across all btls
* note that we need to do this here, as we may already have btls configured
* (2) determine the highest priority ranking for latency
* (3) compute the maximum amount of bytes that can be send without any
* weighting. Once the left over is smaller than this number we will
* start using the weight to compute the correct amount.
*/
n_send = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
n_rdma = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
/* sort BTLs in descending order according to bandwidth value */
qsort(bml_endpoint->btl_send.bml_btls, n_send,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
bml_endpoint->btl_rdma_index = 0;
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency);
/* (1) set the weight of each btl as a percentage of overall bandwidth
* (2) copy all btl instances at the highest priority ranking into the
* list of btls used for first fragments
*/
for (size_t n_index = 0 ; n_index < n_send ; ++n_index) {
mca_bml_base_btl_t* bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
mca_btl_base_module_t *btl = bml_btl->btl;
/* compute weighting factor for this r2 */
if(btl->btl_bandwidth > 0) {
bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_send);
}
/* check to see if this r2 is already in the array of r2s
* used for first fragments - if not add it.
*/
if(btl->btl_latency == latency) {
mca_bml_base_btl_t* bml_btl_new =
mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
*bml_btl_new = *bml_btl;
}
/* set endpoint max send size as min of available btls */
if(bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
bml_endpoint->btl_max_send_size = btl->btl_max_send_size;
}
/* sort BTLs in descending order according to bandwidth value */
qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma,
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency);
/* set rdma btl weights */
for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) {
mca_bml_base_btl_t *bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index);
/* compute weighting factor for this r2 */
if (bml_btl->btl->btl_bandwidth > 0.0) {
bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth);
} else {
bml_btl->btl_weight = (float)(1.0 / n_rdma);
}
} }
} }
/* see if we have a connection to everyone else */ /* see if we have a connection to everyone else */
for(p = 0; p < n_new_procs; p++) { for(size_t p = 0; p < n_new_procs ; ++p) {
ompi_proc_t *proc = new_procs[p]; ompi_proc_t *proc = new_procs[p];
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) { if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
ret = OMPI_ERR_UNREACH; ret = OMPI_ERR_UNREACH;
if (mca_bml_r2.show_unreach_errors) { if (mca_bml_r2.show_unreach_errors) {
opal_show_help("help-mca-bml-r2.txt", opal_show_help("help-mca-bml-r2.txt", "unreachable proc", true,
"unreachable proc",
true,
OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)), OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
(NULL != ompi_proc_local_proc->super.proc_hostname ? (NULL != ompi_proc_local_proc->super.proc_hostname ?
ompi_proc_local_proc->super.proc_hostname : "unknown!"), ompi_proc_local_proc->super.proc_hostname : "unknown!"),
@ -459,6 +574,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
proc->super.proc_hostname : "unknown!"), proc->super.proc_hostname : "unknown!"),
btl_names); btl_names);
} }
break; break;
} }
} }
@ -476,7 +592,6 @@ static int mca_bml_r2_add_procs( size_t nprocs,
static int mca_bml_r2_del_procs(size_t nprocs, static int mca_bml_r2_del_procs(size_t nprocs,
struct ompi_proc_t** procs) struct ompi_proc_t** procs)
{ {
size_t p;
int rc; int rc;
struct ompi_proc_t** del_procs = (struct ompi_proc_t**) struct ompi_proc_t** del_procs = (struct ompi_proc_t**)
malloc(nprocs * sizeof(struct ompi_proc_t*)); malloc(nprocs * sizeof(struct ompi_proc_t*));
@ -486,26 +601,27 @@ static int mca_bml_r2_del_procs(size_t nprocs,
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
for(p = 0; p < nprocs; p++) { for (size_t p = 0 ; p < nprocs ; ++p) {
ompi_proc_t *proc = procs[p]; ompi_proc_t *proc = procs[p];
/* We much check that there are 2 references to the proc (not 1). The /* We much check that there are 2 references to the proc (not 1). The
* first reference belongs to ompi/proc the second belongs to the bml * first reference belongs to ompi/proc the second belongs to the bml
* since we retained it. We will release that reference at the end of * since we retained it. We will release that reference at the end of
* the loop below. */ * the loop below. */
if(((opal_object_t*)proc)->obj_reference_count == 2) { if (((opal_object_t*)proc)->obj_reference_count == 2 &&
NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
del_procs[n_del_procs++] = proc; del_procs[n_del_procs++] = proc;
} }
} }
for(p = 0; p < n_del_procs; p++) { for (size_t p = 0 ; p < n_del_procs ; ++p) {
ompi_proc_t *proc = del_procs[p]; ompi_proc_t *proc = del_procs[p];
mca_bml_base_endpoint_t* bml_endpoint = mca_bml_base_endpoint_t* bml_endpoint =
(mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
size_t f_index, f_size; size_t f_size;
/* notify each btl that the proc is going away */ /* notify each btl that the proc is going away */
f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); f_size = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
for(f_index = 0; f_index < f_size; f_index++) { for (size_t f_index = 0 ; f_index < f_size ; ++f_index) {
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, f_index); mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, f_index);
mca_btl_base_module_t* btl = bml_btl->btl; mca_btl_base_module_t* btl = bml_btl->btl;
@ -521,10 +637,12 @@ static int mca_bml_r2_del_procs(size_t nprocs,
*/ */
} }
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
OBJ_RELEASE(proc); OBJ_RELEASE(proc);
/* do any required cleanup */ /* do any required cleanup */
OBJ_RELEASE(bml_endpoint); OBJ_RELEASE(bml_endpoint);
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
} }
free(del_procs); free(del_procs);
@ -835,6 +953,7 @@ int mca_bml_r2_component_fini(void)
mca_bml_r2_module_t mca_bml_r2 = { mca_bml_r2_module_t mca_bml_r2 = {
.super = { .super = {
.bml_component = &mca_bml_r2_component, .bml_component = &mca_bml_r2_component,
.bml_add_proc = mca_bml_r2_add_proc,
.bml_add_procs = mca_bml_r2_add_procs, .bml_add_procs = mca_bml_r2_add_procs,
.bml_del_procs = mca_bml_r2_del_procs, .bml_del_procs = mca_bml_r2_del_procs,
.bml_add_btl = mca_bml_r2_add_btl, .bml_add_btl = mca_bml_r2_add_btl,
@ -843,8 +962,7 @@ mca_bml_r2_module_t mca_bml_r2 = {
.bml_register = mca_bml_r2_register, .bml_register = mca_bml_r2_register,
.bml_register_error = mca_bml_r2_register_error, .bml_register_error = mca_bml_r2_register_error,
.bml_finalize = mca_bml_r2_finalize, .bml_finalize = mca_bml_r2_finalize,
.bml_ft_event = mca_bml_r2_ft_event .bml_ft_event = mca_bml_r2_ft_event,
} },
}; };

Просмотреть файл

@ -35,25 +35,6 @@ int mca_coll_fca_init_query(bool enable_progress_threads,
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
static int have_remote_peers(ompi_group_t *group, size_t size, int *local_peers)
{
ompi_proc_t *proc;
size_t i;
int ret;
*local_peers = 0;
ret = 0;
for (i = 0; i < size; ++i) {
proc = ompi_group_peer_lookup(group, i);
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
++*local_peers;
} else {
ret = 1;
}
}
return ret;
}
static inline ompi_proc_t* __local_rank_lookup(ompi_communicator_t *comm, int rank) static inline ompi_proc_t* __local_rank_lookup(ompi_communicator_t *comm, int rank)
{ {
return ompi_group_peer_lookup(comm->c_local_group, rank); return ompi_group_peer_lookup(comm->c_local_group, rank);
@ -618,7 +599,7 @@ mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority)
if (size < mca_coll_fca_component.fca_np) if (size < mca_coll_fca_component.fca_np)
goto exit; goto exit;
if (!have_remote_peers(comm->c_local_group, size, &local_peers) || OMPI_COMM_IS_INTER(comm)) if (!ompi_group_have_remote_peers(comm->c_local_group) || OMPI_COMM_IS_INTER(comm))
goto exit; goto exit;
fca_module = OBJ_NEW(mca_coll_fca_module_t); fca_module = OBJ_NEW(mca_coll_fca_module_t);

Просмотреть файл

@ -74,7 +74,6 @@ uint32_t mca_coll_sm_one = 1;
*/ */
static int sm_module_enable(mca_coll_base_module_t *module, static int sm_module_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm); struct ompi_communicator_t *comm);
static bool have_local_peers(ompi_group_t *group, size_t size);
static int bootstrap_comm(ompi_communicator_t *comm, static int bootstrap_comm(ompi_communicator_t *comm,
mca_coll_sm_module_t *module); mca_coll_sm_module_t *module);
static int mca_coll_sm_module_disable(mca_coll_base_module_t *module, static int mca_coll_sm_module_disable(mca_coll_base_module_t *module,
@ -172,8 +171,7 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority)
/* If we're intercomm, or if there's only one process in the /* If we're intercomm, or if there's only one process in the
communicator, or if not all the processes in the communicator communicator, or if not all the processes in the communicator
are not on this node, then we don't want to run */ are not on this node, then we don't want to run */
if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || ompi_group_have_remote_peers (comm->c_local_group)) {
!have_local_peers(comm->c_local_group, ompi_comm_size(comm))) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output, opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name); "coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name);
return NULL; return NULL;
@ -490,23 +488,6 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
static bool have_local_peers(ompi_group_t *group, size_t size)
{
size_t i;
ompi_proc_t *proc;
for (i = 0; i < size; ++i) {
proc = ompi_group_peer_lookup(group,i);
if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
return false;
}
}
return true;
}
static int bootstrap_comm(ompi_communicator_t *comm, static int bootstrap_comm(ompi_communicator_t *comm,
mca_coll_sm_module_t *module) mca_coll_sm_module_t *module)
{ {

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California. * Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -23,6 +26,7 @@
#include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/pml.h"
#include "ompi/mca/mtl/mtl.h" #include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h" #include "ompi/mca/mtl/base/base.h"
#include "ompi/proc/proc.h"
#include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor.h"
#include <psm.h> #include <psm.h>
#include <psm_mq.h> #include <psm_mq.h>

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California. * Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -54,5 +57,14 @@ struct mca_mtl_psm_endpoint_t {
typedef struct mca_mtl_psm_endpoint_t mca_mtl_psm_endpoint_t; typedef struct mca_mtl_psm_endpoint_t mca_mtl_psm_endpoint_t;
OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint); OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint);
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
{
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
ompi_mtl_psm_add_procs (mtl, 1, &ompi_proc);
}
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
}
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2006 The Regents of the University of California. * Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -42,7 +45,7 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
int ret; int ret;
size_t length; size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm.super); assert(mtl == &ompi_mtl_psm.super);
@ -94,7 +97,7 @@ ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl,
mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request; mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request;
size_t length; size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm_endpoint_t* psm_endpoint = (mca_mtl_psm_endpoint_t*)ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm.super); assert(mtl == &ompi_mtl_psm.super);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved. * Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved * Copyright (c) 2015 Intel, Inc. All rights reserved
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -24,6 +27,7 @@
#include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/pml.h"
#include "ompi/mca/mtl/mtl.h" #include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h" #include "ompi/mca/mtl/base/base.h"
#include "ompi/proc/proc.h"
#include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor.h"
#include <psm2.h> #include <psm2.h>
#include <psm2_mq.h> #include <psm2_mq.h>

Просмотреть файл

@ -55,5 +55,14 @@ struct mca_mtl_psm2_endpoint_t {
typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t; typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t;
OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint); OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint);
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
{
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
ompi_mtl_psm2_add_procs (mtl, 1, &ompi_proc);
}
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
}
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -43,7 +43,7 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl,
int ret; int ret;
size_t length; size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm2.super); assert(mtl == &ompi_mtl_psm2.super);
@ -95,7 +95,7 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl,
mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request; mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request;
size_t length; size_t length;
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest ); ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
mca_mtl_psm2_endpoint_t* psm_endpoint = (mca_mtl_psm2_endpoint_t*)ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
assert(mtl == &ompi_mtl_psm2.super); assert(mtl == &ompi_mtl_psm2.super);

Просмотреть файл

@ -299,7 +299,7 @@ ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank)
static inline ptl_process_t static inline ptl_process_t
ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank) ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank)
{ {
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank); ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank, true);
return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]);
} }

Просмотреть файл

@ -134,10 +134,8 @@ check_win_ok(ompi_communicator_t *comm, int flavor)
return OMPI_ERR_NOT_SUPPORTED; return OMPI_ERR_NOT_SUPPORTED;
} }
for (i = 0 ; i < ompi_comm_size(comm) ; ++i) { if (ompi_group_have_remote_peers (comm->c_local_group)) {
if (!OPAL_PROC_ON_LOCAL_NODE(ompi_comm_peer_lookup(comm, i)->super.proc_flags)) { return OMPI_ERR_RMA_SHARED;
return OMPI_ERR_RMA_SHARED;
}
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -191,11 +191,9 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
{ {
/* allocate pml specific comm data */ /* allocate pml specific comm data */
mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t); mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t);
opal_list_item_t *item, *next_item; mca_pml_ob1_recv_frag_t *frag, *next_frag;
mca_pml_ob1_recv_frag_t* frag;
mca_pml_ob1_comm_proc_t* pml_proc; mca_pml_ob1_comm_proc_t* pml_proc;
mca_pml_ob1_match_hdr_t* hdr; mca_pml_ob1_match_hdr_t* hdr;
int i;
if (NULL == pml_comm) { if (NULL == pml_comm) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
@ -210,16 +208,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
comm->c_pml_comm = pml_comm; comm->c_pml_comm = pml_comm;
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i);
OBJ_RETAIN(pml_comm->procs[i].ompi_proc);
}
/* Grab all related messages from the non_existing_communicator pending queue */ /* Grab all related messages from the non_existing_communicator pending queue */
for( item = opal_list_get_first(&mca_pml_ob1.non_existing_communicator_pending); OPAL_LIST_FOREACH_SAFE(frag, next_frag, &mca_pml_ob1.non_existing_communicator_pending, mca_pml_ob1_recv_frag_t) {
item != opal_list_get_end(&mca_pml_ob1.non_existing_communicator_pending);
item = next_item ) {
frag = (mca_pml_ob1_recv_frag_t*)item;
next_item = opal_list_get_next(item);
hdr = &frag->hdr.hdr_match; hdr = &frag->hdr.hdr_match;
/* Is this fragment for the current communicator ? */ /* Is this fragment for the current communicator ? */
@ -229,8 +219,8 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
/* As we now know we work on a fragment for this communicator /* As we now know we work on a fragment for this communicator
* we should remove it from the * we should remove it from the
* non_existing_communicator_pending list. */ * non_existing_communicator_pending list. */
opal_list_remove_item( &mca_pml_ob1.non_existing_communicator_pending, opal_list_remove_item (&mca_pml_ob1.non_existing_communicator_pending,
item ); (opal_list_item_t *) frag);
add_fragment_to_unexpected: add_fragment_to_unexpected:
@ -249,7 +239,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
* We just have to push the fragment into the unexpected list of the corresponding * We just have to push the fragment into the unexpected list of the corresponding
* proc, or into the out-of-order (cant_match) list. * proc, or into the out-of-order (cant_match) list.
*/ */
pml_proc = &(pml_comm->procs[hdr->hdr_src]); pml_proc = mca_pml_ob1_peer_lookup(comm, hdr->hdr_src);
if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) {
/* We're now expecting the next sequence number. */ /* We're now expecting the next sequence number. */
@ -283,12 +273,6 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
int mca_pml_ob1_del_comm(ompi_communicator_t* comm) int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
{ {
mca_pml_ob1_comm_t* pml_comm = comm->c_pml_comm;
int i;
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
OBJ_RELEASE(pml_comm->procs[i].ompi_proc);
}
OBJ_RELEASE(comm->c_pml_comm); OBJ_RELEASE(comm->c_pml_comm);
comm->c_pml_comm = NULL; comm->c_pml_comm = NULL;
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -303,9 +287,9 @@ int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
{ {
mca_btl_base_selected_module_t *sm;
opal_bitmap_t reachable; opal_bitmap_t reachable;
int rc; int rc;
opal_list_item_t *item;
if(nprocs == 0) if(nprocs == 0)
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -347,11 +331,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
BTLs requires iterating over the procs, as the BML does not BTLs requires iterating over the procs, as the BML does not
expose all currently in use btls. */ expose all currently in use btls. */
for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; OPAL_LIST_FOREACH(sm, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
item != opal_list_get_end(&mca_btl_base_modules_initialized) ;
item = opal_list_get_next(item)) {
mca_btl_base_selected_module_t *sm =
(mca_btl_base_selected_module_t*) item;
if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_ob1_hdr_t)) { if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_ob1_hdr_t)) {
opal_show_help("help-mpi-pml-ob1.txt", "eager_limit_too_small", opal_show_help("help-mpi-pml-ob1.txt", "eager_limit_too_small",
true, true,
@ -589,13 +569,19 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose)
/* iterate through all procs on communicator */ /* iterate through all procs on communicator */
for( i = 0; i < (int)pml_comm->num_procs; i++ ) { for( i = 0; i < (int)pml_comm->num_procs; i++ ) {
mca_pml_ob1_comm_proc_t* proc = &pml_comm->procs[i]; mca_pml_ob1_comm_proc_t* proc = pml_comm->procs[i];
if (NULL == proc) {
continue;
}
mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
size_t n; size_t n;
opal_output(0, "[Rank %d] expected_seq %d ompi_proc %p send_seq %d\n", opal_output(0, "[Rank %d] expected_seq %d ompi_proc %p send_seq %d\n",
i, proc->expected_sequence, (void*) proc->ompi_proc, i, proc->expected_sequence, (void*) proc->ompi_proc,
proc->send_sequence); proc->send_sequence);
/* dump all receive queues */ /* dump all receive queues */
if( opal_list_get_size(&proc->specific_receives) ) { if( opal_list_get_size(&proc->specific_receives) ) {
opal_output(0, "expected specific receives\n"); opal_output(0, "expected specific receives\n");

Просмотреть файл

@ -40,14 +40,15 @@ static void mca_pml_ob1_comm_proc_destruct(mca_pml_ob1_comm_proc_t* proc)
OBJ_DESTRUCT(&proc->frags_cant_match); OBJ_DESTRUCT(&proc->frags_cant_match);
OBJ_DESTRUCT(&proc->specific_receives); OBJ_DESTRUCT(&proc->specific_receives);
OBJ_DESTRUCT(&proc->unexpected_frags); OBJ_DESTRUCT(&proc->unexpected_frags);
if (proc->ompi_proc) {
OBJ_RELEASE(proc->ompi_proc);
}
} }
static OBJ_CLASS_INSTANCE( OBJ_CLASS_INSTANCE(mca_pml_ob1_comm_proc_t, opal_object_t,
mca_pml_ob1_comm_proc_t, mca_pml_ob1_comm_proc_construct,
opal_object_t, mca_pml_ob1_comm_proc_destruct);
mca_pml_ob1_comm_proc_construct,
mca_pml_ob1_comm_proc_destruct);
static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm) static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm)
@ -63,11 +64,16 @@ static void mca_pml_ob1_comm_construct(mca_pml_ob1_comm_t* comm)
static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm) static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm)
{ {
size_t i; if (NULL != comm->procs) {
for(i=0; i<comm->num_procs; i++) for (size_t i = 0; i < comm->num_procs; ++i) {
OBJ_DESTRUCT((&comm->procs[i])); if (comm->procs[i]) {
if(NULL != comm->procs) OBJ_RELEASE(comm->procs[i]);
}
}
free(comm->procs); free(comm->procs);
}
OBJ_DESTRUCT(&comm->wild_receives); OBJ_DESTRUCT(&comm->wild_receives);
OBJ_DESTRUCT(&comm->matching_lock); OBJ_DESTRUCT(&comm->matching_lock);
} }
@ -80,18 +86,13 @@ OBJ_CLASS_INSTANCE(
mca_pml_ob1_comm_destruct); mca_pml_ob1_comm_destruct);
int mca_pml_ob1_comm_init_size(mca_pml_ob1_comm_t* comm, size_t size) int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size)
{ {
size_t i;
/* send message sequence-number support - sender side */ /* send message sequence-number support - sender side */
comm->procs = (mca_pml_ob1_comm_proc_t*)malloc(sizeof(mca_pml_ob1_comm_proc_t)*size); comm->procs = (mca_pml_ob1_comm_proc_t **) calloc(size, sizeof (mca_pml_ob1_comm_proc_t *));
if(NULL == comm->procs) { if(NULL == comm->procs) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
for(i=0; i<size; i++) {
OBJ_CONSTRUCT(comm->procs+i, mca_pml_ob1_comm_proc_t);
}
comm->num_procs = size; comm->num_procs = size;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -24,6 +24,7 @@
#include "opal/threads/mutex.h" #include "opal/threads/mutex.h"
#include "opal/class/opal_list.h" #include "opal/class/opal_list.h"
#include "ompi/proc/proc.h" #include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
BEGIN_C_DECLS BEGIN_C_DECLS
@ -42,6 +43,7 @@ struct mca_pml_ob1_comm_proc_t {
}; };
typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t; typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t;
OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_proc_t);
/** /**
* Cached on ompi_communicator_t to hold queues/state * Cached on ompi_communicator_t to hold queues/state
@ -56,7 +58,7 @@ struct mca_pml_comm_t {
#endif #endif
opal_mutex_t matching_lock; /**< matching lock */ opal_mutex_t matching_lock; /**< matching lock */
opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */ opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */
mca_pml_ob1_comm_proc_t* procs; mca_pml_ob1_comm_proc_t **procs;
size_t num_procs; size_t num_procs;
size_t last_probed; size_t last_probed;
}; };
@ -64,6 +66,18 @@ typedef struct mca_pml_comm_t mca_pml_ob1_comm_t;
OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t); OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t);
static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_communicator_t *comm, int rank)
{
mca_pml_ob1_comm_t *pml_comm = (mca_pml_ob1_comm_t *)comm->c_pml_comm;
if (OPAL_UNLIKELY(NULL == pml_comm->procs[rank])) {
pml_comm->procs[rank] = OBJ_NEW(mca_pml_ob1_comm_proc_t);
pml_comm->procs[rank]->ompi_proc = ompi_comm_peer_lookup (comm, rank);
OBJ_RETAIN(pml_comm->procs[rank]->ompi_proc);
}
return pml_comm->procs[rank];
}
/** /**
* Initialize an instance of mca_pml_ob1_comm_t based on the communicator size. * Initialize an instance of mca_pml_ob1_comm_t based on the communicator size.

Просмотреть файл

@ -144,9 +144,12 @@ static int mca_pml_ob1_get_unex_msgq_size (const struct mca_base_pvar_t *pvar, v
int i; int i;
for (i = 0 ; i < comm_size ; ++i) { for (i = 0 ; i < comm_size ; ++i) {
pml_proc = pml_comm->procs + i; pml_proc = pml_comm->procs[i];
if (pml_proc) {
values[i] = opal_list_get_size (&pml_proc->unexpected_frags); values[i] = opal_list_get_size (&pml_proc->unexpected_frags);
} else {
values[i] = 0;
}
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -162,9 +165,13 @@ static int mca_pml_ob1_get_posted_recvq_size (const struct mca_base_pvar_t *pvar
int i; int i;
for (i = 0 ; i < comm_size ; ++i) { for (i = 0 ; i < comm_size ; ++i) {
pml_proc = pml_comm->procs + i; pml_proc = pml_comm->procs[i];
values[i] = opal_list_get_size (&pml_proc->specific_receives); if (pml_proc) {
values[i] = opal_list_get_size (&pml_proc->specific_receives);
} else {
values[i] = 0;
}
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -148,7 +148,6 @@ mca_pml_ob1_imrecv( void *buf,
int src, tag; int src, tag;
ompi_communicator_t *comm; ompi_communicator_t *comm;
mca_pml_ob1_comm_proc_t* proc; mca_pml_ob1_comm_proc_t* proc;
mca_pml_ob1_comm_t* ob1_comm;
uint64_t seq; uint64_t seq;
/* get the request from the message and the frag from the request /* get the request from the message and the frag from the request
@ -158,7 +157,6 @@ mca_pml_ob1_imrecv( void *buf,
src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
comm = (*message)->comm; comm = (*message)->comm;
ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
seq = recvreq->req_recv.req_base.req_sequence; seq = recvreq->req_recv.req_base.req_sequence;
/* make the request a recv request again */ /* make the request a recv request again */
@ -196,7 +194,7 @@ mca_pml_ob1_imrecv( void *buf,
/* Note - sequence number already assigned */ /* Note - sequence number already assigned */
recvreq->req_recv.req_base.req_sequence = seq; recvreq->req_recv.req_base.req_sequence = seq;
proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer]; proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
recvreq->req_recv.req_base.req_proc = proc->ompi_proc; recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
prepare_recv_req_converter(recvreq); prepare_recv_req_converter(recvreq);
@ -243,7 +241,6 @@ mca_pml_ob1_mrecv( void *buf,
int src, tag, rc; int src, tag, rc;
ompi_communicator_t *comm; ompi_communicator_t *comm;
mca_pml_ob1_comm_proc_t* proc; mca_pml_ob1_comm_proc_t* proc;
mca_pml_ob1_comm_t* ob1_comm;
uint64_t seq; uint64_t seq;
/* get the request from the message and the frag from the request /* get the request from the message and the frag from the request
@ -254,7 +251,6 @@ mca_pml_ob1_mrecv( void *buf,
src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE; src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG; tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
seq = recvreq->req_recv.req_base.req_sequence; seq = recvreq->req_recv.req_base.req_sequence;
ob1_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
/* make the request a recv request again */ /* make the request a recv request again */
/* The old request kept pointers to comm and the char datatype. /* The old request kept pointers to comm and the char datatype.
@ -290,7 +286,7 @@ mca_pml_ob1_mrecv( void *buf,
/* Note - sequence number already assigned */ /* Note - sequence number already assigned */
recvreq->req_recv.req_base.req_sequence = seq; recvreq->req_recv.req_base.req_sequence = seq;
proc = &ob1_comm->procs[recvreq->req_recv.req_base.req_peer]; proc = mca_pml_ob1_peer_lookup (comm, recvreq->req_recv.req_base.req_peer);
recvreq->req_recv.req_base.req_proc = proc->ompi_proc; recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
prepare_recv_req_converter(recvreq); prepare_recv_req_converter(recvreq);

Просмотреть файл

@ -126,15 +126,14 @@ int mca_pml_ob1_isend(const void *buf,
ompi_communicator_t * comm, ompi_communicator_t * comm,
ompi_request_t ** request) ompi_request_t ** request)
{ {
mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm; mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst);
mca_pml_ob1_send_request_t *sendreq = NULL; mca_pml_ob1_send_request_t *sendreq = NULL;
ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst); ompi_proc_t *dst_proc = ob1_proc->ompi_proc;
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc);
dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
int16_t seqn; int16_t seqn;
int rc; int rc;
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1); seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc,
@ -176,10 +175,9 @@ int mca_pml_ob1_send(const void *buf,
mca_pml_base_send_mode_t sendmode, mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm) ompi_communicator_t * comm)
{ {
mca_pml_ob1_comm_t* ob1_comm = comm->c_pml_comm; mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst);
ompi_proc_t *dst_proc = ompi_comm_peer_lookup (comm, dst); ompi_proc_t *dst_proc = ob1_proc->ompi_proc;
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (dst_proc);
dst_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
mca_pml_ob1_send_request_t *sendreq = NULL; mca_pml_ob1_send_request_t *sendreq = NULL;
int16_t seqn; int16_t seqn;
int rc; int rc;
@ -202,7 +200,7 @@ int mca_pml_ob1_send(const void *buf,
return OMPI_ERR_UNREACH; return OMPI_ERR_UNREACH;
} }
seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_comm->procs[dst].send_sequence, 1); seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
/** /**
* The immediate send will not have a request, so they are * The immediate send will not have a request, so they are

Просмотреть файл

@ -143,7 +143,7 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl,
comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
/* source sequence number */ /* source sequence number */
proc = &comm->procs[hdr->hdr_src]; proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src);
/* We generate the MSG_ARRIVED event as soon as the PML is aware /* We generate the MSG_ARRIVED event as soon as the PML is aware
* of a matching fragment arrival. Independing if it is received * of a matching fragment arrival. Independing if it is received
@ -650,7 +650,7 @@ static int mca_pml_ob1_recv_frag_match( mca_btl_base_module_t *btl,
/* source sequence number */ /* source sequence number */
frag_msg_seq = hdr->hdr_seq; frag_msg_seq = hdr->hdr_seq;
proc = &comm->procs[hdr->hdr_src]; proc = mca_pml_ob1_peer_lookup (comm_ptr, hdr->hdr_src);
/** /**
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching

Просмотреть файл

@ -100,7 +100,8 @@ static int mca_pml_ob1_recv_request_free(struct ompi_request_t** request)
static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, int complete) static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request, int complete)
{ {
mca_pml_ob1_recv_request_t* request = (mca_pml_ob1_recv_request_t*)ompi_request; mca_pml_ob1_recv_request_t* request = (mca_pml_ob1_recv_request_t*)ompi_request;
mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm; ompi_communicator_t *comm = request->req_recv.req_base.req_comm;
mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm;
if( true == request->req_match_received ) { /* way to late to cancel this one */ if( true == request->req_match_received ) { /* way to late to cancel this one */
assert( OMPI_ANY_TAG != ompi_request->req_status.MPI_TAG ); /* not matched isn't it */ assert( OMPI_ANY_TAG != ompi_request->req_status.MPI_TAG ); /* not matched isn't it */
@ -108,11 +109,11 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
} }
/* The rest should be protected behind the match logic lock */ /* The rest should be protected behind the match logic lock */
OPAL_THREAD_LOCK(&comm->matching_lock); OPAL_THREAD_LOCK(&ob1_comm->matching_lock);
if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) {
opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request ); opal_list_remove_item( &ob1_comm->wild_receives, (opal_list_item_t*)request );
} else { } else {
mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; mca_pml_ob1_comm_proc_t* proc = mca_pml_ob1_peer_lookup (comm, request->req_recv.req_base.req_peer);
opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request);
} }
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
@ -122,7 +123,7 @@ static int mca_pml_ob1_recv_request_cancel(struct ompi_request_t* ompi_request,
* to true. Otherwise, the request will never be freed. * to true. Otherwise, the request will never be freed.
*/ */
request->req_recv.req_base.req_pml_complete = true; request->req_recv.req_base.req_pml_complete = true;
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
OPAL_THREAD_LOCK(&ompi_request_lock); OPAL_THREAD_LOCK(&ompi_request_lock);
ompi_request->req_status._cancelled = true; ompi_request->req_status._cancelled = true;
@ -260,7 +261,7 @@ static int mca_pml_ob1_recv_request_ack(
ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc;
mca_bml_base_endpoint_t* bml_endpoint = NULL; mca_bml_base_endpoint_t* bml_endpoint = NULL;
bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; bml_endpoint = mca_bml_base_get_endpoint (proc);
/* by default copy everything */ /* by default copy everything */
recvreq->req_send_offset = bytes_received; recvreq->req_send_offset = bytes_received;
@ -654,7 +655,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
} }
/* lookup bml datastructures */ /* lookup bml datastructures */
bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; bml_endpoint = mca_bml_base_get_endpoint (recvreq->req_recv.req_base.req_proc);
rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
@ -1079,8 +1080,11 @@ static mca_pml_ob1_recv_frag_t*
recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req, recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req,
mca_pml_ob1_comm_proc_t *proc ) mca_pml_ob1_comm_proc_t *proc )
{ {
if (NULL == proc) {
return NULL;
}
opal_list_t* unexpected_frags = &proc->unexpected_frags; opal_list_t* unexpected_frags = &proc->unexpected_frags;
opal_list_item_t *i;
mca_pml_ob1_recv_frag_t* frag; mca_pml_ob1_recv_frag_t* frag;
int tag = req->req_recv.req_base.req_tag; int tag = req->req_recv.req_base.req_tag;
@ -1088,20 +1092,12 @@ recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req,
return NULL; return NULL;
if( OMPI_ANY_TAG == tag ) { if( OMPI_ANY_TAG == tag ) {
for (i = opal_list_get_first(unexpected_frags); OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) {
i != opal_list_get_end(unexpected_frags);
i = opal_list_get_next(i)) {
frag = (mca_pml_ob1_recv_frag_t*)i;
if( frag->hdr.hdr_match.hdr_tag >= 0 ) if( frag->hdr.hdr_match.hdr_tag >= 0 )
return frag; return frag;
} }
} else { } else {
for (i = opal_list_get_first(unexpected_frags); OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) {
i != opal_list_get_end(unexpected_frags);
i = opal_list_get_next(i)) {
frag = (mca_pml_ob1_recv_frag_t*)i;
if( frag->hdr.hdr_match.hdr_tag == tag ) if( frag->hdr.hdr_match.hdr_tag == tag )
return frag; return frag;
} }
@ -1118,7 +1114,7 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
mca_pml_ob1_comm_proc_t **p) mca_pml_ob1_comm_proc_t **p)
{ {
mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm;
mca_pml_ob1_comm_proc_t* proc = comm->procs; mca_pml_ob1_comm_proc_t **procp = comm->procs;
size_t i; size_t i;
/* /*
@ -1133,10 +1129,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
mca_pml_ob1_recv_frag_t* frag; mca_pml_ob1_recv_frag_t* frag;
/* loop over messages from the current proc */ /* loop over messages from the current proc */
if((frag = recv_req_match_specific_proc(req, &proc[i]))) { if((frag = recv_req_match_specific_proc(req, procp[i]))) {
*p = &proc[i]; *p = procp[i];
comm->last_probed = i; comm->last_probed = i;
req->req_recv.req_base.req_proc = proc[i].ompi_proc; req->req_recv.req_base.req_proc = procp[i]->ompi_proc;
prepare_recv_req_converter(req); prepare_recv_req_converter(req);
return frag; /* match found */ return frag; /* match found */
} }
@ -1145,10 +1141,10 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
mca_pml_ob1_recv_frag_t* frag; mca_pml_ob1_recv_frag_t* frag;
/* loop over messages from the current proc */ /* loop over messages from the current proc */
if((frag = recv_req_match_specific_proc(req, &proc[i]))) { if((frag = recv_req_match_specific_proc(req, procp[i]))) {
*p = &proc[i]; *p = procp[i];
comm->last_probed = i; comm->last_probed = i;
req->req_recv.req_base.req_proc = proc[i].ompi_proc; req->req_recv.req_base.req_proc = procp[i]->ompi_proc;
prepare_recv_req_converter(req); prepare_recv_req_converter(req);
return frag; /* match found */ return frag; /* match found */
} }
@ -1161,7 +1157,8 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req,
void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
{ {
mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; ompi_communicator_t *comm = req->req_recv.req_base.req_comm;
mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm;
mca_pml_ob1_comm_proc_t* proc; mca_pml_ob1_comm_proc_t* proc;
mca_pml_ob1_recv_frag_t* frag; mca_pml_ob1_recv_frag_t* frag;
opal_list_t *queue; opal_list_t *queue;
@ -1179,7 +1176,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
MCA_PML_BASE_RECV_START(&req->req_recv.req_base); MCA_PML_BASE_RECV_START(&req->req_recv.req_base);
OPAL_THREAD_LOCK(&comm->matching_lock); OPAL_THREAD_LOCK(&ob1_comm->matching_lock);
/** /**
* The laps of time between the ACTIVATE event and the SEARCH_UNEX one include * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include
* the cost of the request lock. * the cost of the request lock.
@ -1188,12 +1185,12 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
&(req->req_recv.req_base), PERUSE_RECV); &(req->req_recv.req_base), PERUSE_RECV);
/* assign sequence number */ /* assign sequence number */
req->req_recv.req_base.req_sequence = comm->recv_sequence++; req->req_recv.req_base.req_sequence = ob1_comm->recv_sequence++;
/* attempt to match posted recv */ /* attempt to match posted recv */
if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) {
frag = recv_req_match_wild(req, &proc); frag = recv_req_match_wild(req, &proc);
queue = &comm->wild_receives; queue = &ob1_comm->wild_receives;
#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT #if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* As we are in a homogeneous environment we know that all remote /* As we are in a homogeneous environment we know that all remote
* architectures are exactly the same as the local one. Therefore, * architectures are exactly the same as the local one. Therefore,
@ -1206,7 +1203,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
} }
#endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ #endif /* !OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
} else { } else {
proc = &comm->procs[req->req_recv.req_base.req_peer]; proc = mca_pml_ob1_peer_lookup (comm, req->req_recv.req_base.req_peer);
req->req_recv.req_base.req_proc = proc->ompi_proc; req->req_recv.req_base.req_proc = proc->ompi_proc;
frag = recv_req_match_specific_proc(req, proc); frag = recv_req_match_specific_proc(req, proc);
queue = &proc->specific_receives; queue = &proc->specific_receives;
@ -1221,7 +1218,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
it when the message comes in. */ it when the message comes in. */
append_recv_req_to_queue(queue, req); append_recv_req_to_queue(queue, req);
req->req_match_received = false; req->req_match_received = false;
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
} else { } else {
if(OPAL_LIKELY(!IS_PROB_REQ(req))) { if(OPAL_LIKELY(!IS_PROB_REQ(req))) {
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX, PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX,
@ -1239,7 +1236,7 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
opal_list_remove_item(&proc->unexpected_frags, opal_list_remove_item(&proc->unexpected_frags,
(opal_list_item_t*)frag); (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
switch(hdr->hdr_common.hdr_type) { switch(hdr->hdr_common.hdr_type) {
case MCA_PML_OB1_HDR_TYPE_MATCH: case MCA_PML_OB1_HDR_TYPE_MATCH:
@ -1269,14 +1266,14 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req)
restarted with this request during mrecv */ restarted with this request during mrecv */
opal_list_remove_item(&proc->unexpected_frags, opal_list_remove_item(&proc->unexpected_frags,
(opal_list_item_t*)frag); (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
req->req_recv.req_base.req_addr = frag; req->req_recv.req_base.req_addr = frag;
mca_pml_ob1_recv_request_matched_probe(req, frag->btl, mca_pml_ob1_recv_request_matched_probe(req, frag->btl,
frag->segments, frag->num_segments); frag->segments, frag->num_segments);
} else { } else {
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&ob1_comm->matching_lock);
mca_pml_ob1_recv_request_matched_probe(req, frag->btl, mca_pml_ob1_recv_request_matched_probe(req, frag->btl,
frag->segments, frag->num_segments); frag->segments, frag->num_segments);
} }

Просмотреть файл

@ -433,8 +433,7 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
{ {
size_t i; size_t i;
mca_bml_base_btl_t* bml_btl; mca_bml_base_btl_t* bml_btl;
mca_bml_base_endpoint_t* endpoint = mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
(mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);

Просмотреть файл

@ -480,16 +480,16 @@ mca_pml_ob1_send_request_start_seq (mca_pml_ob1_send_request_t* sendreq, mca_bml
static inline int static inline int
mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq ) mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
{ {
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) mca_bml_base_endpoint_t *endpoint = mca_bml_base_get_endpoint (sendreq->req_send.req_base.req_proc);
sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; ompi_communicator_t *comm = sendreq->req_send.req_base.req_comm;
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, sendreq->req_send.req_base.req_peer);
int32_t seqn; int32_t seqn;
if (OPAL_UNLIKELY(NULL == endpoint)) { if (OPAL_UNLIKELY(NULL == endpoint)) {
return OMPI_ERR_UNREACH; return OMPI_ERR_UNREACH;
} }
seqn = OPAL_THREAD_ADD32(&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence, 1); seqn = OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn); return mca_pml_ob1_send_request_start_seq (sendreq, endpoint, seqn);
} }

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -10,7 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California. * Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
@ -43,6 +44,8 @@
static opal_list_t ompi_proc_list; static opal_list_t ompi_proc_list;
static opal_mutex_t ompi_proc_lock; static opal_mutex_t ompi_proc_lock;
static opal_hash_table_t ompi_proc_hash;
ompi_proc_t* ompi_proc_local_proc = NULL; ompi_proc_t* ompi_proc_local_proc = NULL;
static void ompi_proc_construct(ompi_proc_t* proc); static void ompi_proc_construct(ompi_proc_t* proc);
@ -83,49 +86,223 @@ void ompi_proc_destruct(ompi_proc_t* proc)
} }
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
opal_list_remove_item(&ompi_proc_list, (opal_list_item_t*)proc); opal_list_remove_item(&ompi_proc_list, (opal_list_item_t*)proc);
opal_hash_table_remove_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name));
OPAL_THREAD_UNLOCK(&ompi_proc_lock); OPAL_THREAD_UNLOCK(&ompi_proc_lock);
} }
/**
* Allocate a new ompi_proc_T for the given jobid/vpid
*
* @param[in] jobid Job identifier
* @param[in] vpid Process identifier
* @param[out] procp New ompi_proc_t structure
*
* This function allocates a new ompi_proc_t and inserts it into
* the process list and hash table.
*/
static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t **procp) {
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = jobid;
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = vpid;
opal_hash_table_set_value_ptr (&ompi_proc_hash, &proc->super.proc_name, sizeof (proc->super.proc_name),
proc);
*procp = proc;
return OMPI_SUCCESS;
}
/**
* Finish setting up an ompi_proc_t
*
* @param[in] proc ompi process structure
*
* This function contains the core code of ompi_proc_complete_init() and
* ompi_proc_refresh(). The tasks performed by this function include
* retrieving the hostname (if below the modex cutoff), determining the
* remote architecture, and calculating the locality of the process.
*/
static int ompi_proc_complete_init_single (ompi_proc_t *proc)
{
uint16_t u16, *u16ptr;
int ret;
u16ptr = &u16;
if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid) {
/* nothing else to do */
return OMPI_SUCCESS;
}
/* get the locality information - all RTEs are required
* to provide this information at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS != ret) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
proc->super.proc_flags = u16;
}
/* we can retrieve the hostname at no cost because it
* was provided at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
return ret;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* get the remote architecture - this might force a modex except
* for those environments where the RM provides it */
{
uint32_t *ui32ptr;
ui32ptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
(void**)&ui32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
proc->super.proc_arch = opal_local_arch;
} else {
return ret;
}
}
#else
/* must be same arch as my own */
proc->super.proc_arch = opal_local_arch;
#endif
return OMPI_SUCCESS;
}
opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name)
{
ompi_proc_t *proc = NULL;
int ret;
/* try to lookup the value in the hash table */
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
if (OPAL_SUCCESS == ret) {
return &proc->super;
}
return NULL;
}
opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name)
{
ompi_proc_t *proc = NULL;
int ret;
/* try to lookup the value in the hash table */
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
if (OPAL_SUCCESS == ret) {
return &proc->super;
}
OPAL_THREAD_LOCK(&ompi_proc_lock);
do {
/* double-check that another competing thread has not added this proc */
ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc);
if (OPAL_SUCCESS == ret) {
break;
}
/* allocate a new ompi_proc_t object for the process and insert it into the process table */
ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
/* allocation fail */
break;
}
/* finish filling in the important proc data fields */
ret = ompi_proc_complete_init_single (proc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
proc = NULL;
break;
}
} while (0);
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
return (opal_proc_t *) proc;
}
int ompi_proc_init(void) int ompi_proc_init(void)
{ {
ompi_vpid_t i; int opal_proc_hash_init_size = (ompi_process_info.num_procs < ompi_add_procs_cutoff) ? ompi_process_info.num_procs :
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT 1024;
ompi_proc_t *proc;
int ret; int ret;
#endif
OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t); OBJ_CONSTRUCT(&ompi_proc_list, opal_list_t);
OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t); OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ompi_proc_hash, opal_hash_table_t);
/* create proc structures and find self */ ret = opal_hash_table_init (&ompi_proc_hash, opal_proc_hash_init_size);
for( i = 0; i < ompi_process_info.num_procs; i++ ) { if (OPAL_SUCCESS != ret) {
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t); return ret;
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc); }
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid; /* create a proc for the local process */
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = i; ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, OMPI_PROC_MY_NAME->vpid, &proc);
if (OMPI_SUCCESS != ret) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (i == OMPI_PROC_MY_NAME->vpid) { /* set local process data */
ompi_proc_local_proc = proc; ompi_proc_local_proc = proc;
proc->super.proc_flags = OPAL_PROC_ALL_LOCAL; proc->super.proc_flags = OPAL_PROC_ALL_LOCAL;
proc->super.proc_hostname = strdup(ompi_process_info.nodename); proc->super.proc_hostname = strdup(ompi_process_info.nodename);
proc->super.proc_arch = opal_local_arch; proc->super.proc_arch = opal_local_arch;
/* Register the local proc with OPAL */ /* Register the local proc with OPAL */
opal_proc_local_set(&proc->super); opal_proc_local_set(&proc->super);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* add our arch to the modex */ /* add our arch to the modex */
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_MODEX_SEND_VALUE(ret, PMIX_GLOBAL,
OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32); OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32);
if (OPAL_SUCCESS != ret) { if (OPAL_SUCCESS != ret) {
return ret;
}
#endif
if (ompi_process_info.num_procs < ompi_add_procs_cutoff) {
/* create proc structures and find self */
for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) {
if (i == OMPI_PROC_MY_NAME->vpid) {
continue;
}
ret = ompi_proc_allocate (OMPI_PROC_MY_NAME->jobid, i, &proc);
if (OMPI_SUCCESS != ret) {
return ret; return ret;
} }
#endif
} }
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
static int ompi_proc_compare_vid (opal_list_item_t **a, opal_list_item_t **b)
{
ompi_proc_t *proca = (ompi_proc_t *) *a;
ompi_proc_t *procb = (ompi_proc_t *) *b;
if (proca->super.proc_name.vpid > procb->super.proc_name.vpid) {
return 1;
} else {
return -1;
}
/* they should never be equal */
}
/** /**
* The process creation is split into two steps. The second step * The process creation is split into two steps. The second step
@ -140,58 +317,47 @@ int ompi_proc_complete_init(void)
{ {
ompi_proc_t *proc; ompi_proc_t *proc;
int ret, errcode = OMPI_SUCCESS; int ret, errcode = OMPI_SUCCESS;
uint16_t u16, *u16ptr;
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
u16ptr = &u16;
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid != OMPI_PROC_MY_NAME->vpid) { ret = ompi_proc_complete_init_single (proc);
/* get the locality information - all RTEs are required if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
* to provide this information at startup */ errcode = ret;
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); break;
if (OPAL_SUCCESS != ret) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
proc->super.proc_flags = u16;
}
/* we can retrieve the hostname at no cost because it
* was provided at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
/* we can live without it */
proc->super.proc_hostname = NULL;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* get the remote architecture - this might force a modex except
* for those environments where the RM provides it */
{
uint32_t *ui32ptr;
ui32ptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
(void**)&ui32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
proc->super.proc_arch = opal_local_arch;
} else {
errcode = ret;
break;
}
}
#else
/* must be same arch as my own */
proc->super.proc_arch = opal_local_arch;
#endif
} }
} }
OPAL_THREAD_UNLOCK(&ompi_proc_lock); OPAL_THREAD_UNLOCK(&ompi_proc_lock);
if (ompi_process_info.num_procs >= ompi_add_procs_cutoff) {
uint16_t u16, *u16ptr;
u16ptr = &u16;
/* find and add all local processes */
for (ompi_vpid_t i = 0 ; i < ompi_process_info.num_procs ; ++i ) {
opal_process_name_t proc_name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
uint16_t locality = OPAL_PROC_NON_LOCAL;
if (OMPI_PROC_MY_NAME->vpid == i) {
continue;
}
/* the runtime is required to fill in locality for all local processes by this
* point. only local processes will have locality set */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS == ret) {
locality = u16;
}
if (OPAL_PROC_NON_LOCAL != locality) {
(void) ompi_proc_for_name (proc_name);
}
}
}
opal_list_sort (&ompi_proc_list, ompi_proc_compare_vid);
return errcode; return errcode;
} }
@ -227,6 +393,7 @@ int ompi_proc_finalize (void)
/* now destruct the list and thread lock */ /* now destruct the list and thread lock */
OBJ_DESTRUCT(&ompi_proc_list); OBJ_DESTRUCT(&ompi_proc_list);
OBJ_DESTRUCT(&ompi_proc_lock); OBJ_DESTRUCT(&ompi_proc_lock);
OBJ_DESTRUCT(&ompi_proc_hash);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -248,9 +415,7 @@ ompi_proc_t** ompi_proc_world(size_t *size)
/* First count how many match this jobid */ /* First count how many match this jobid */
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, OMPI_CAST_RTE_NAME(&proc->super.proc_name), &my_name)) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, OMPI_CAST_RTE_NAME(&proc->super.proc_name), &my_name)) {
++count; ++count;
} }
@ -265,9 +430,7 @@ ompi_proc_t** ompi_proc_world(size_t *size)
/* now save only the procs that match this jobid */ /* now save only the procs that match this jobid */
count = 0; count = 0;
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, &my_name)) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, &my_name)) {
/* DO NOT RETAIN THIS OBJECT - the reference count on this /* DO NOT RETAIN THIS OBJECT - the reference count on this
* object will be adjusted by external callers. The intent * object will be adjusted by external callers. The intent
@ -305,9 +468,7 @@ ompi_proc_t** ompi_proc_all(size_t* size)
} }
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
/* We know this isn't consistent with the behavior in ompi_proc_world, /* We know this isn't consistent with the behavior in ompi_proc_world,
* but we are leaving the RETAIN for now because the code using this function * but we are leaving the RETAIN for now because the code using this function
* assumes that the results need to be released when done. It will * assumes that the results need to be released when done. It will
@ -349,9 +510,7 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name )
/* return the proc-struct which matches this jobid+process id */ /* return the proc-struct which matches this jobid+process id */
mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID;
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) {
rproc = proc; rproc = proc;
break; break;
@ -366,21 +525,14 @@ ompi_proc_t * ompi_proc_find ( const ompi_process_name_t * name )
int ompi_proc_refresh(void) int ompi_proc_refresh(void)
{ {
ompi_proc_t *proc = NULL; ompi_proc_t *proc = NULL;
opal_list_item_t *item = NULL;
ompi_vpid_t i = 0; ompi_vpid_t i = 0;
int ret=OMPI_SUCCESS; int ret=OMPI_SUCCESS;
uint16_t u16, *u16ptr;
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
for( item = opal_list_get_first(&ompi_proc_list), i = 0; OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
item != opal_list_get_end(&ompi_proc_list);
item = opal_list_get_next(item), ++i ) {
proc = (ompi_proc_t*)item;
/* Does not change: proc->super.proc_name.vpid */ /* Does not change: proc->super.proc_name.vpid */
OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid; OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid;
u16ptr = &u16;
/* Make sure to clear the local flag before we set it below */ /* Make sure to clear the local flag before we set it below */
proc->super.proc_flags = 0; proc->super.proc_flags = 0;
@ -392,56 +544,10 @@ int ompi_proc_refresh(void)
proc->super.proc_arch = opal_local_arch; proc->super.proc_arch = opal_local_arch;
opal_proc_local_set(&proc->super); opal_proc_local_set(&proc->super);
} else { } else {
/* get the locality information - all RTEs are required ret = ompi_proc_complete_init_single (proc);
* to provide this information at startup */ if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); break;
if (OPAL_SUCCESS != ret) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
proc->super.proc_flags = u16;
} }
if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) {
/* IF the number of procs falls below the specified cutoff,
* then we assume the job is small enough that retrieving
* the hostname (which will typically cause retrieval of
* ALL modex info for this proc) will have no appreciable
* impact on launch scaling
*/
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OMPI_SUCCESS != ret) {
break;
}
} else {
/* just set the hostname to NULL for now - we'll fill it in
* as modex_recv's are called for procs we will talk to, thus
* avoiding retrieval of ALL modex info for this proc until
* required. Transports that delay calling modex_recv until
* first message will therefore scale better than those that
* call modex_recv on all procs during init.
*/
proc->super.proc_hostname = NULL;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
{
/* get the remote architecture */
uint32_t* uiptr = &(proc->super.proc_arch);
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name,
(void**)&uiptr, OPAL_UINT32);
if (OMPI_SUCCESS != ret) {
break;
}
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
proc->super.proc_convertor = opal_convertor_create(proc->super.proc_arch, 0);
}
}
#else
/* must be same arch as my own */
proc->super.proc_arch = opal_local_arch;
#endif
} }
} }
@ -454,7 +560,7 @@ int
ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
opal_buffer_t* buf) opal_buffer_t* buf)
{ {
int i, rc; int rc;
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
@ -470,7 +576,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
* reduced. For now, just go ahead and pack the info so it * reduced. For now, just go ahead and pack the info so it
* can be sent. * can be sent.
*/ */
for (i=0; i<proclistsize; i++) { for (int i = 0 ; i < proclistsize ; ++i) {
rc = opal_dss.pack(buf, &(proclist[i]->super.proc_name), 1, OMPI_NAME); rc = opal_dss.pack(buf, &(proclist[i]->super.proc_name), 1, OMPI_NAME);
if(rc != OPAL_SUCCESS) { if(rc != OPAL_SUCCESS) {
OMPI_ERROR_LOG(rc); OMPI_ERROR_LOG(rc);
@ -503,9 +609,7 @@ ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew)
/* return the proc-struct which matches this jobid+process id */ /* return the proc-struct which matches this jobid+process id */
mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID;
OPAL_THREAD_LOCK(&ompi_proc_lock); OPAL_THREAD_LOCK(&ompi_proc_lock);
for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->super.proc_name, name)) {
rproc = proc; rproc = proc;
*isnew = false; *isnew = false;
@ -538,7 +642,6 @@ ompi_proc_unpack(opal_buffer_t* buf,
int proclistsize, ompi_proc_t ***proclist, int proclistsize, ompi_proc_t ***proclist,
int *newproclistsize, ompi_proc_t ***newproclist) int *newproclistsize, ompi_proc_t ***newproclist)
{ {
int i;
size_t newprocs_len = 0; size_t newprocs_len = 0;
ompi_proc_t **plist=NULL, **newprocs = NULL; ompi_proc_t **plist=NULL, **newprocs = NULL;
@ -558,7 +661,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
/* cycle through the array of provided procs and unpack /* cycle through the array of provided procs and unpack
* their info - as packed by ompi_proc_pack * their info - as packed by ompi_proc_pack
*/ */
for (i=0; i<proclistsize; i++){ for (int i = 0; i < proclistsize ; ++i){
int32_t count=1; int32_t count=1;
ompi_process_name_t new_name; ompi_process_name_t new_name;
uint32_t new_arch; uint32_t new_arch;

Просмотреть файл

@ -304,6 +304,35 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf,
*/ */
OMPI_DECLSPEC int ompi_proc_refresh(void); OMPI_DECLSPEC int ompi_proc_refresh(void);
/**
* Get the ompi_proc_t for a given process name
*
* @param[in] proc_name opal process name
*
* @returns cached or new ompi_proc_t for the given process name
*
* This function looks up the given process name in the hash of existing
* ompi_proc_t structures. If no ompi_proc_t structure exists matching the
* given name a new ompi_proc_t is allocated, initialized, and returned.
*
* @note The ompi_proc_t is added to the local list of processes but is not
* added to any communicator. ompi_comm_peer_lookup is responsible for caching
* the ompi_proc_t on a communicator.
*/
OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name);
OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name);
static inline intptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) {
return -*((intptr_t *) &name);
}
static inline opal_process_name_t ompi_proc_sentinel_to_name (intptr_t sentinel) {
sentinel = -sentinel;
return *((opal_process_name_t *) &sentinel);
}
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -13,6 +14,8 @@
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -88,7 +91,7 @@ static void try_kill_peers(ompi_communicator_t *comm,
} else { } else {
assert(count <= nprocs); assert(count <= nprocs);
procs[count++] = procs[count++] =
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name); *OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
} }
} }
@ -96,7 +99,7 @@ static void try_kill_peers(ompi_communicator_t *comm,
for (i = 0; i < ompi_comm_remote_size(comm); ++i) { for (i = 0; i < ompi_comm_remote_size(comm); ++i) {
assert(count <= nprocs); assert(count <= nprocs);
procs[count++] = procs[count++] =
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i)->super.proc_name); *OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
} }
if (nprocs > 0) { if (nprocs > 0) {

Просмотреть файл

@ -400,6 +400,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
opal_compare_proc = _process_name_compare; opal_compare_proc = _process_name_compare;
opal_convert_string_to_process_name = _convert_string_to_process_name; opal_convert_string_to_process_name = _convert_string_to_process_name;
opal_convert_process_name_to_string = _convert_process_name_to_string; opal_convert_process_name_to_string = _convert_process_name_to_string;
opal_proc_for_name = ompi_proc_for_name;
/* Register MCA variables */ /* Register MCA variables */
if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) { if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) {

Просмотреть файл

@ -64,6 +64,7 @@ int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL; char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE); bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
bool ompi_mpi_preconnect_mpi = false; bool ompi_mpi_preconnect_mpi = false;
uint32_t ompi_add_procs_cutoff = 1024;
static bool show_default_mca_params = false; static bool show_default_mca_params = false;
static bool show_file_mca_params = false; static bool show_file_mca_params = false;
@ -288,6 +289,16 @@ int ompi_mpi_register_params(void)
ompi_rte_abort(1, NULL); ompi_rte_abort(1, NULL);
} }
ompi_add_procs_cutoff = 1024;
(void) mca_base_var_register ("ompi", "mpi", NULL, "add_procs_cutoff",
"Maximum world size for pre-allocating resources for all "
"remote processes. Increasing this limit may improve "
"communication performance at the cost of memory usage "
"(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_add_procs_cutoff);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -9,7 +10,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
@ -123,11 +124,16 @@ OMPI_DECLSPEC extern bool ompi_have_sparse_group_storage;
*/ */
OMPI_DECLSPEC extern bool ompi_use_sparse_group_storage; OMPI_DECLSPEC extern bool ompi_use_sparse_group_storage;
/* /**
* Cutoff point for retrieving hostnames * Cutoff point for retrieving hostnames
*/ */
OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff; OMPI_DECLSPEC extern uint32_t ompi_direct_modex_cutoff;
/**
* Cutoff point for calling add_procs for all processes
*/
OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
/** /**
* Register MCA parameters used by the MPI layer. * Register MCA parameters used by the MPI layer.
* *

Просмотреть файл

@ -605,12 +605,15 @@ typedef int (*mca_btl_base_module_finalize_fn_t)(
* modex_recv() function. The BTL may utilize this information to * modex_recv() function. The BTL may utilize this information to
* determine reachability of each peer process. * determine reachability of each peer process.
* *
* For each process that is reachable by the BTL, the bit corresponding to the index * The caller may pass a "reachable" bitmap pointer. If it is not
* into the proc array (nprocs) should be set in the reachable bitmask. The BTL * NULL, for each process that is reachable by the BTL, the bit
* will return an array of pointers to a data structure defined * corresponding to the index into the proc array (nprocs) should be
* by the BTL that is then returned to the BTL on subsequent calls to the BTL data * set in the reachable bitmask. The BTL will return an array of
* transfer functions (e.g btl_send). This may be used by the BTL to cache any addressing * pointers to a data structure defined by the BTL that is then
* or connection information (e.g. TCP socket, IB queue pair). * returned to the BTL on subsequent calls to the BTL data transfer
* functions (e.g btl_send). This may be used by the BTL to cache any
* addressing or connection information (e.g. TCP socket, IB queue
* pair).
*/ */
typedef int (*mca_btl_base_module_add_procs_fn_t)( typedef int (*mca_btl_base_module_add_procs_fn_t)(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,

Просмотреть файл

@ -871,6 +871,7 @@ int mca_btl_openib_add_procs(
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) { for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
struct opal_proc_t* proc = procs[i]; struct opal_proc_t* proc = procs[i];
mca_btl_openib_proc_t* ib_proc; mca_btl_openib_proc_t* ib_proc;
bool found_existing = false;
int remote_matching_port; int remote_matching_port;
opal_output(-1, "add procs: adding proc %d", i); opal_output(-1, "add procs: adding proc %d", i);
@ -898,6 +899,24 @@ int mca_btl_openib_add_procs(
continue; continue;
} }
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) {
endpoint = ib_proc->proc_endpoints[j];
if (endpoint->endpoint_btl == openib_btl) {
found_existing = true;
break;
}
}
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
if (found_existing) {
if (reachable) {
opal_bitmap_set_bit(reachable, i);
}
peers[i] = endpoint;
continue;
}
/* check if the remote proc has any ports that: /* check if the remote proc has any ports that:
- on the same subnet as the local proc, and - on the same subnet as the local proc, and
- on that subnet, has a CPC in common with the local proc - on that subnet, has a CPC in common with the local proc
@ -1048,6 +1067,37 @@ int mca_btl_openib_add_procs(
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl, struct opal_proc_t *proc)
{
mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t *) btl;
mca_btl_base_endpoint_t *endpoint;
mca_btl_openib_proc_t *ib_proc;
if (NULL == (ib_proc = mca_btl_openib_proc_create(proc))) {
/* if we don't have connection info for this process, it's
* okay because some other method might be able to reach it,
* so just mark it as unreachable by us */
return NULL;
}
OPAL_THREAD_LOCK(&ib_proc->proc_lock);
for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) {
endpoint = ib_proc->proc_endpoints[j];
if (endpoint->endpoint_btl == openib_btl) {
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
return endpoint;
}
}
OPAL_THREAD_UNLOCK(&ib_proc->proc_lock);
BTL_VERBOSE(("creating new endpoint for remote process {.jobid = 0x%x, .vpid = 0x%x}",
proc->proc_name.jobid, proc->proc_name.vpid));
endpoint = NULL;
(void) mca_btl_openib_add_procs (btl, 1, &proc, &endpoint, NULL);
return endpoint;
}
/* /*
* delete the proc as reachable from this btl module * delete the proc as reachable from this btl module
*/ */

Просмотреть файл

@ -874,6 +874,18 @@ int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp);
const char* btl_openib_get_transport_name(mca_btl_openib_transport_type_t transport_type); const char* btl_openib_get_transport_name(mca_btl_openib_transport_type_t transport_type);
/**
* Get an endpoint for a process
*
* @param btl (IN) BTL module
* @param proc (IN) opal process object
*
* This function will return an existing endpoint if one exists otherwise it will allocate
* a new endpoint and return it.
*/
struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_module_t *btl,
struct opal_proc_t *proc);
/** /**
* Get a transport type of btl. * Get a transport type of btl.
*/ */

Просмотреть файл

@ -565,7 +565,8 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024; mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024;
mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024; mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024;
mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
MCA_BTL_FLAGS_SEND;
#if BTL_OPENIB_FAILOVER_ENABLED #if BTL_OPENIB_FAILOVER_ENABLED
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT; mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT;
#endif #endif

Просмотреть файл

@ -218,6 +218,7 @@ typedef struct udcm_msg_hdr {
union { union {
/* UDCM_MESSAGE_CONNECT */ /* UDCM_MESSAGE_CONNECT */
struct msg_connect { struct msg_connect {
opal_process_name_t rem_name;
int32_t rem_ep_index; int32_t rem_ep_index;
uint8_t rem_port_num; uint8_t rem_port_num;
} req; } req;
@ -1473,36 +1474,26 @@ static int udcm_rc_qp_create_all (mca_btl_base_endpoint_t *lcl_ep)
/* JMS: optimization target -- can we send something in private /* JMS: optimization target -- can we send something in private
data to find the proc directly instead of having to search data to find the proc directly instead of having to search
through *all* procs? */ through *all* procs? */
static mca_btl_openib_endpoint_t *udcm_find_endpoint (opal_pointer_array_t *endpoints, static mca_btl_openib_endpoint_t *udcm_find_endpoint (struct mca_btl_openib_module_t *btl,
uint32_t qp_num, uint16_t lid, uint32_t qp_num, uint16_t lid,
udcm_msg_hdr_t *msg_hdr) udcm_msg_hdr_t *msg_hdr)
{ {
uint8_t port_num; mca_btl_base_endpoint_t *endpoint;
int i; struct opal_proc_t *opal_proc;
port_num = msg_hdr->data.req.rem_port_num; opal_proc = opal_proc_for_name (msg_hdr->data.req.rem_name);
if (NULL == opal_proc) {
for (i = 0 ; i < opal_pointer_array_get_size (endpoints) ; ++i) { BTL_ERROR(("could not get proc associated with remote peer"));
mca_btl_openib_endpoint_t *endpoint; return NULL;
modex_msg_t *msg;
endpoint = (mca_btl_openib_endpoint_t *)
opal_pointer_array_get_item (endpoints, i);
if (NULL == endpoint) {
continue;
}
msg = UDCM_ENDPOINT_REM_MODEX(endpoint);
if (msg->mm_qp_num == qp_num && msg->mm_port_num == port_num &&
msg->mm_lid == lid)
return endpoint;
} }
BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d", endpoint = mca_btl_openib_get_ep (&btl->super, opal_proc);
port_num, lid, msg_hdr->type)); if (NULL == endpoint) {
BTL_ERROR(("could not find endpoint with port: %d, lid: %d, msg_type: %d",
msg_hdr->data.req.rem_port_num, lid, msg_hdr->type));
}
return NULL; return endpoint;
} }
static int udcm_endpoint_init_data (mca_btl_base_endpoint_t *lcl_ep) static int udcm_endpoint_init_data (mca_btl_base_endpoint_t *lcl_ep)
@ -1678,6 +1669,7 @@ static int udcm_send_request (mca_btl_base_endpoint_t *lcl_ep,
msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index); msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index);
msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num; msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num;
msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME;
for (i = 0 ; i < mca_btl_openib_component.num_qps ; ++i) { for (i = 0 ; i < mca_btl_openib_component.num_qps ; ++i) {
msg->data->qps[i].psn = htonl(lcl_ep->qps[i].qp->lcl_psn); msg->data->qps[i].psn = htonl(lcl_ep->qps[i].qp->lcl_psn);
@ -1981,8 +1973,7 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m)
lcl_ep = message->hdr.lcl_ep; lcl_ep = message->hdr.lcl_ep;
if (NULL == lcl_ep) { if (NULL == lcl_ep) {
lcl_ep = udcm_find_endpoint (m->btl->device->endpoints, wc[i].src_qp, lcl_ep = udcm_find_endpoint (m->btl, wc[i].src_qp, wc[i].slid, &message->hdr);
wc[i].slid, &message->hdr);
} }
if (NULL == lcl_ep ) { if (NULL == lcl_ep ) {
@ -2824,6 +2815,7 @@ static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_
msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index); msg->data->hdr.data.req.rem_ep_index = htonl(lcl_ep->index);
msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num; msg->data->hdr.data.req.rem_port_num = m->modex.mm_port_num;
msg->data->hdr.data.req.rem_name = OPAL_PROC_MY_NAME;
if (UDCM_MESSAGE_XCONNECT == msg_type) { if (UDCM_MESSAGE_XCONNECT == msg_type) {
BTL_VERBOSE(("Sending XConnect with qp: %d, psn: %d", lcl_ep->qps[0].qp->lcl_qp->qp_num, BTL_VERBOSE(("Sending XConnect with qp: %d, psn: %d", lcl_ep->qps[0].qp->lcl_qp->qp_num,

Просмотреть файл

@ -221,7 +221,8 @@ mca_btl_portals4_component_open(void)
mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0; mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
mca_btl_portals4_module.super.btl_flags = mca_btl_portals4_module.super.btl_flags =
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_RDMA_MATCHED; MCA_BTL_FLAGS_RDMA_MATCHED |
MCA_BTL_FLAGS_SEND;
mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

Просмотреть файл

@ -98,7 +98,7 @@ static int mca_btl_self_component_register(void)
mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX; mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX;
mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX; mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
mca_btl_self.btl_min_rdma_pipeline_size = 0; mca_btl_self.btl_min_rdma_pipeline_size = 0;
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
mca_btl_self.btl_bandwidth = 100; mca_btl_self.btl_bandwidth = 100;
mca_btl_self.btl_latency = 0; mca_btl_self.btl_latency = 0;
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version, mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
@ -72,6 +72,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
struct opal_proc_t* opal_proc = procs[i]; struct opal_proc_t* opal_proc = procs[i];
mca_btl_tcp_proc_t* tcp_proc; mca_btl_tcp_proc_t* tcp_proc;
mca_btl_base_endpoint_t* tcp_endpoint; mca_btl_base_endpoint_t* tcp_endpoint;
bool existing_found = false;
/* Do not create loopback TCP connections */ /* Do not create loopback TCP connections */
if( my_proc == opal_proc ) { if( my_proc == opal_proc ) {
@ -90,28 +91,43 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
OPAL_THREAD_LOCK(&tcp_proc->proc_lock); OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
/* The btl_proc datastructure is shared by all TCP BTL for (int j = 0 ; j < tcp_proc->proc_endpoint_count ; ++j) {
* instances that are trying to reach this destination. tcp_endpoint = tcp_proc->proc_endpoints[j];
* Cache the peer instance on the btl_proc. if (tcp_endpoint->endpoint_btl == tcp_btl) {
*/ existing_found = true;
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t); break;
if(NULL == tcp_endpoint) { }
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
} }
tcp_endpoint->endpoint_btl = tcp_btl; if (!existing_found) {
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint); /* The btl_proc datastructure is shared by all TCP BTL
if(rc != OPAL_SUCCESS) { * instances that are trying to reach this destination.
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); * Cache the peer instance on the btl_proc.
OBJ_RELEASE(tcp_endpoint); */
continue; tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
tcp_endpoint->endpoint_btl = tcp_btl;
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OPAL_SUCCESS) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(tcp_endpoint);
continue;
}
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
} }
opal_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
if (NULL != reachable) {
opal_bitmap_set_bit(reachable, i);
}
peers[i] = tcp_endpoint; peers[i] = tcp_endpoint;
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
/* we increase the count of MPI users of the event library /* we increase the count of MPI users of the event library
once per peer, so that we are used until we aren't once per peer, so that we are used until we aren't

Просмотреть файл

@ -269,7 +269,8 @@ static int mca_btl_tcp_component_register(void)
MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND_INPLACE |
MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_NEED_CSUM |
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_ACK |
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
MCA_BTL_FLAGS_SEND;
mca_btl_tcp_module.super.btl_bandwidth = 100; mca_btl_tcp_module.super.btl_bandwidth = 100;
mca_btl_tcp_module.super.btl_latency = 100; mca_btl_tcp_module.super.btl_latency = 100;

Просмотреть файл

@ -14,7 +14,9 @@
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -738,6 +740,31 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
*name, (void**)&proc); *name, (void**)&proc);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
if (OPAL_UNLIKELY(NULL == proc)) {
mca_btl_base_endpoint_t *endpoint;
opal_proc_t *opal_proc;
int rc;
BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}",
name->jobid, name->vpid));
opal_proc = opal_proc_for_name (*name);
if (NULL == opal_proc) {
return NULL;
}
/* try adding this proc to each btl until */
for (int i = 0 ; i < mca_btl_tcp_component.tcp_num_btls ; ++i) {
endpoint = NULL;
(void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc,
&endpoint, NULL);
if (NULL != endpoint && NULL == proc) {
/* get the proc and continue on (could probably just break here) */
proc = endpoint->endpoint_proc;
}
}
}
return proc; return proc;
} }

Просмотреть файл

@ -49,7 +49,7 @@
/* ompi and smsg endpoint attributes */ /* ompi and smsg endpoint attributes */
typedef struct mca_btl_ugni_endpoint_attr_t { typedef struct mca_btl_ugni_endpoint_attr_t {
uint64_t proc_id; opal_process_name_t proc_name;
uint32_t index; uint32_t index;
gni_smsg_attr_t smsg_attr; gni_smsg_attr_t smsg_attr;
gni_mem_handle_t rmt_irq_mem_hndl; gni_mem_handle_t rmt_irq_mem_hndl;
@ -67,6 +67,7 @@ typedef struct mca_btl_ugni_module_t {
opal_common_ugni_device_t *device; opal_common_ugni_device_t *device;
opal_mutex_t endpoint_lock;
size_t endpoint_count; size_t endpoint_count;
opal_pointer_array_t endpoints; opal_pointer_array_t endpoints;
opal_hash_table_t id_to_endpoint; opal_hash_table_t id_to_endpoint;
@ -229,6 +230,8 @@ mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
struct opal_proc_t **procs, struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers); struct mca_btl_base_endpoint_t **peers);
struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc);
/** /**
* Initiate an asynchronous send. * Initiate an asynchronous send.
* *

Просмотреть файл

@ -28,13 +28,11 @@ static void
mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs); mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_procs);
static int mca_btl_ugni_smsg_setup (int nprocs); static int mca_btl_ugni_smsg_setup (int nprocs);
int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl, int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
size_t nprocs, struct opal_proc_t **procs,
struct opal_proc_t **procs, struct mca_btl_base_endpoint_t **peers,
struct mca_btl_base_endpoint_t **peers, opal_bitmap_t *reachable) {
opal_bitmap_t *reachable) {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
size_t i;
int rc; int rc;
void *mmap_start_addr; void *mmap_start_addr;
@ -59,36 +57,45 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
} }
} }
for (i = 0 ; i < nprocs ; ++i) { for (size_t i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i]; struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name); uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) { /* check for an existing endpoint */
ugni_module->nlocal_procs++; OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) {
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
ugni_module->nlocal_procs++;
/* ugni is allowed on local processes to provide support for network /* ugni is allowed on local processes to provide support for network
* atomic operations */ * atomic operations */
}
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
BTL_ERROR(("btl/ugni error initializing endpoint"));
return rc;
}
/* go ahead and connect the local endpoint for RDMA/CQ write */
if (opal_proc == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
++ugni_module->endpoint_count;
} }
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* Create and Init endpoints */ /* Set the reachable bit if necessary */
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc); if (reachable) {
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { rc = opal_bitmap_set_bit (reachable, i);
BTL_ERROR(("btl/ugni error initializing endpoint"));
return rc;
} }
/* go ahead and connect the local endpoint for RDMA/CQ write */
if (opal_proc == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
/* Set the reachable bit */
rc = opal_bitmap_set_bit (reachable, i);
++ugni_module->endpoint_count;
} }
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs); mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
@ -224,6 +231,41 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) module;
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(proc->proc_name);
mca_btl_base_endpoint_t *ep;
int rc;
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
do {
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
if (OPAL_SUCCESS == rc) {
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
break;
}
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/ugni error initializing endpoint"));
break;
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep);
} while (0);
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
return ep;
}
static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size, static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg) mca_mpool_base_registration_t *reg)
{ {

Просмотреть файл

@ -386,8 +386,8 @@ mca_btl_ugni_component_init (int *num_btl_modules,
static inline int static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
{ {
uint64_t datagram_id, data, proc_id;
uint32_t remote_addr, remote_id; uint32_t remote_addr, remote_id;
uint64_t datagram_id, data;
mca_btl_base_endpoint_t *ep; mca_btl_base_endpoint_t *ep;
gni_post_state_t post_state; gni_post_state_t post_state;
gni_ep_handle_t handle; gni_ep_handle_t handle;
@ -425,15 +425,24 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
if (handle == ugni_module->wildcard_ep) { if (handle == ugni_module->wildcard_ep) {
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64, ugni_module->wc_remote_attr.proc_id)); proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint,
ugni_module->wc_remote_attr.proc_id, BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
(void *) &ep); proc_id));
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* check if the endpoint is known */ /* check if the endpoint is known */
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
BTL_ERROR(("received connection attempt from an unknown peer. rc: %d, ep: %p, id: 0x%" PRIx64, struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
rc, (void *) ep, ugni_module->wc_remote_attr.proc_id)); BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
return OPAL_ERR_NOT_FOUND; ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
if (OPAL_UNLIKELY(NULL == ep)) {
return rc;
}
} }
} else { } else {
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));

Просмотреть файл

@ -91,6 +91,7 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t); OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t); OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t); OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->endpoint_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t); OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t); OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t); OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t);
@ -208,6 +209,7 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
OBJ_DESTRUCT(&ugni_module->smsg_mboxes); OBJ_DESTRUCT(&ugni_module->smsg_mboxes);
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb); OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
OBJ_DESTRUCT(&ugni_module->id_to_endpoint); OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
OBJ_DESTRUCT(&ugni_module->endpoint_lock);
OBJ_DESTRUCT(&ugni_module->endpoints); OBJ_DESTRUCT(&ugni_module->endpoints);
OBJ_DESTRUCT(&ugni_module->eager_get_pending); OBJ_DESTRUCT(&ugni_module->eager_get_pending);

Просмотреть файл

@ -27,7 +27,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
mbox->attr.smsg_attr.msg_buffer = base_reg->base; mbox->attr.smsg_attr.msg_buffer = base_reg->base;
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size; mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle; mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (OPAL_PROC_MY_NAME); mbox->attr.proc_name = OPAL_PROC_MY_NAME;
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl; mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
} }

Просмотреть файл

@ -427,7 +427,7 @@ static int usnic_add_procs(struct mca_btl_base_module_t* base_module,
/* Find all the endpoints with a complete set of USD destinations /* Find all the endpoints with a complete set of USD destinations
and mark them as reachable */ and mark them as reachable */
for (size_t i = 0; i < nprocs; ++i) { for (size_t i = 0; NULL != reachable && i < nprocs; ++i) {
if (NULL != endpoints[i]) { if (NULL != endpoints[i]) {
bool happy = true; bool happy = true;
for (int channel = 0; channel < USNIC_NUM_CHANNELS; ++channel) { for (int channel = 0; channel < USNIC_NUM_CHANNELS; ++channel) {

Просмотреть файл

@ -239,8 +239,10 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit;
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) { if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_vader.super.btl_flags |= MCA_BTL_FLAGS_RDMA;
/* Single copy mechanisms should provide better bandwidth */ /* Single copy mechanisms should provide better bandwidth */
mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
@ -248,7 +250,6 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_get = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma; mca_btl_vader.super.btl_get = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
mca_btl_vader.super.btl_put = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma; mca_btl_vader.super.btl_put = (mca_btl_base_module_get_fn_t) mca_btl_vader_dummy_rdma;
} else { } else {
mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */ mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */
} }

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2013 The University of Tennessee and The University * Copyright (c) 2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
@ -6,6 +7,8 @@
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -162,6 +165,11 @@ static int opal_convert_string_to_jobid_should_never_be_called(opal_jobid_t *job
return OPAL_ERR_NOT_SUPPORTED; return OPAL_ERR_NOT_SUPPORTED;
} }
static struct opal_proc_t *opal_proc_for_name_should_never_be_called (opal_process_name_t name)
{
return NULL;
}
char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_print_should_never_be_called; char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_print_should_never_be_called;
char* (*opal_vpid_print)(const opal_vpid_t) = opal_vpid_print_should_never_be_called; char* (*opal_vpid_print)(const opal_vpid_t) = opal_vpid_print_should_never_be_called;
char* (*opal_jobid_print)(const opal_jobid_t) = opal_jobid_print_should_never_be_called; char* (*opal_jobid_print)(const opal_jobid_t) = opal_jobid_print_should_never_be_called;
@ -169,6 +177,7 @@ int (*opal_convert_string_to_process_name)(opal_process_name_t *name, const char
int (*opal_convert_process_name_to_string)(char** name_string, const opal_process_name_t *name) = opal_convert_process_name_to_string_should_never_be_called; int (*opal_convert_process_name_to_string)(char** name_string, const opal_process_name_t *name) = opal_convert_process_name_to_string_should_never_be_called;
char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid) = opal_convert_jobid_to_string_should_never_be_called; char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid) = opal_convert_jobid_to_string_should_never_be_called;
int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string) = opal_convert_string_to_jobid_should_never_be_called; int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string) = opal_convert_string_to_jobid_should_never_be_called;
struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name) = opal_proc_for_name_should_never_be_called;
char* opal_get_proc_hostname(const opal_proc_t *proc) char* opal_get_proc_hostname(const opal_proc_t *proc)
{ {

Просмотреть файл

@ -136,6 +136,13 @@ OPAL_DECLSPEC extern char* (*opal_jobid_print)(const opal_jobid_t);
OPAL_DECLSPEC extern char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid); OPAL_DECLSPEC extern char* (*opal_convert_jobid_to_string)(opal_jobid_t jobid);
OPAL_DECLSPEC extern int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string); OPAL_DECLSPEC extern int (*opal_convert_string_to_jobid)(opal_jobid_t *jobid, const char *jobid_string);
/**
* Lookup an opal_proc_t by name
*
* @param name (IN) name to lookup
*/
OPAL_DECLSPEC extern struct opal_proc_t *(*opal_proc_for_name) (const opal_process_name_t name);
#define OPAL_NAME_PRINT(OPAL_PN) opal_process_name_print(OPAL_PN) #define OPAL_NAME_PRINT(OPAL_PN) opal_process_name_print(OPAL_PN)
#define OPAL_JOBID_PRINT(OPAL_PN) opal_jobid_print(OPAL_PN) #define OPAL_JOBID_PRINT(OPAL_PN) opal_jobid_print(OPAL_PN)
#define OPAL_VPID_PRINT(OPAL_PN) opal_vpid_print(OPAL_PN) #define OPAL_VPID_PRINT(OPAL_PN) opal_vpid_print(OPAL_PN)

Просмотреть файл

@ -113,6 +113,8 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
if (NULL == oshmem_group_all) { if (NULL == oshmem_group_all) {
osh_group->ompi_comm = &(ompi_mpi_comm_world.comm); osh_group->ompi_comm = &(ompi_mpi_comm_world.comm);
} else { } else {
int my_rank = MPI_UNDEFINED;
err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group); err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group);
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
return NULL; return NULL;
@ -132,6 +134,10 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
break; break;
} }
} }
/* NTH: keep track of my rank in the new group for the workaround below */
if (ranks[i] == ompi_comm_rank (&ompi_mpi_comm_world.comm)) {
my_rank = i;
}
} }
err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group); err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group);
@ -139,6 +145,15 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
free(ranks); free(ranks);
return NULL; return NULL;
} }
/* NTH: XXX -- WORKAROUND -- The oshmem code overwrites ompi_proc_local_proc with its
* own proc but does not update the proc list in comm world or comm self. This causes
* the code in ompi_group_incl that updates grp_my_rank to fail. This will cause failures
* here and when an application attempts to mix oshmem and mpi so it will really need to
* be fixed in oshmem/proc and not here. For now we need to work around a new jenkins
* failure so set my group ranking so we do not crash when running ompi_comm_create_group. */
new_group->grp_my_rank = my_rank;
err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm); err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
free(ranks); free(ranks);