1
1

OSHMEM: exchange mxm(ikrit) endpoints via MPI_Allgather, code cleanup, remove unused

Refs trac:3763

This commit was SVN r30089.

The following Trac tickets were found above:
  Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763
Этот коммит содержится в:
Mike Dubman 2013-12-26 10:53:48 +00:00
родитель e2f372ac4b
Коммит 92cf175e9e
6 изменённых файлов: 19 добавлений и 154 удалений

Просмотреть файл

@ -594,7 +594,7 @@ void mca_memheap_modex_recv_all(void)
int rc; int rc;
if (!mca_memheap_base_key_exchange) { if (!mca_memheap_base_key_exchange) {
MPI_Barrier(oshmem_comm_world); oshmem_shmem_barrier();
return; return;
} }
@ -622,9 +622,7 @@ void mca_memheap_modex_recv_all(void)
oshmem_shmem_abort(-1); oshmem_shmem_abort(-1);
} }
rc = MPI_Allgather(send_buffer, size, MPI_BYTE, rc = oshmem_shmem_allgather(send_buffer, rcv_buffer, size);
rcv_buffer, size, MPI_BYTE, oshmem_comm_world);
if (MPI_SUCCESS != rc) { if (MPI_SUCCESS != rc) {
MEMHEAP_ERROR("allgather failed"); MEMHEAP_ERROR("allgather failed");
oshmem_shmem_abort(-1); oshmem_shmem_abort(-1);

Просмотреть файл

@ -365,6 +365,7 @@ int mca_spml_ikrit_del_procs(oshmem_proc_t** procs, size_t nprocs)
int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs) int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs)
{ {
spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL; spml_ikrit_mxm_ep_conn_info_t *ep_info = NULL;
spml_ikrit_mxm_ep_conn_info_t my_ep_info;
#if MXM_API < MXM_VERSION(2,0) #if MXM_API < MXM_VERSION(2,0)
mxm_conn_req_t *conn_reqs; mxm_conn_req_t *conn_reqs;
int timeout; int timeout;
@ -403,15 +404,15 @@ int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs)
#if MXM_API < MXM_VERSION(2,0) #if MXM_API < MXM_VERSION(2,0)
if (OSHMEM_SUCCESS if (OSHMEM_SUCCESS
!= spml_ikrit_get_ep_address(&ep_info[my_rank], MXM_PTL_SELF)) { != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_SELF)) {
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
if (OSHMEM_SUCCESS if (OSHMEM_SUCCESS
!= spml_ikrit_get_ep_address(&ep_info[my_rank], MXM_PTL_RDMA)) { != spml_ikrit_get_ep_address(&my_ep_info, MXM_PTL_RDMA)) {
return OSHMEM_ERROR; return OSHMEM_ERROR;
} }
#else #else
err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, ep_info[my_rank].addr.ep_addr, &mxm_addr_len); err = mxm_ep_get_address(mca_spml_ikrit.mxm_ep, &my_ep_info.addr.ep_addr, &mxm_addr_len);
if (MXM_OK != err) { if (MXM_OK != err) {
orte_show_help("help-shmem-spml-ikrit.txt", "unable to get endpoint address", true, orte_show_help("help-shmem-spml-ikrit.txt", "unable to get endpoint address", true,
mxm_error_string(err)); mxm_error_string(err));
@ -421,7 +422,7 @@ int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs)
opal_progress_register(spml_ikrit_progress); opal_progress_register(spml_ikrit_progress);
oshmem_shmem_exchange_allgather(ep_info, oshmem_shmem_allgather(&my_ep_info, ep_info,
sizeof(spml_ikrit_mxm_ep_conn_info_t)); sizeof(spml_ikrit_mxm_ep_conn_info_t));
/* Get the EP connection requests for all the processes from modex */ /* Get the EP connection requests for all the processes from modex */

Просмотреть файл

@ -346,74 +346,6 @@ oshmem_proc_t * oshmem_proc_find(const orte_process_name_t * name)
return rproc; return rproc;
} }
int oshmem_proc_refresh(void)
{
oshmem_proc_t *proc = NULL;
opal_list_item_t *item = NULL;
orte_vpid_t i = 0;
int hostname_length = 0;
OPAL_THREAD_LOCK(&oshmem_proc_lock);
for (item = opal_list_get_first(&oshmem_proc_list), i = 0;
item != opal_list_get_end(&oshmem_proc_list);
item = opal_list_get_next(item), ++i) {
proc = (oshmem_proc_t*) item;
/* Does not change: proc->proc_name.vpid */
proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid;
/* Make sure to clear the local flag before we set it below */
proc->proc_flags = 0;
proc->proc_arch = opal_local_arch;
oshmem_shmem_exchange_bcast(&proc->proc_arch,
sizeof(uint32_t),
i);
hostname_length = strlen(orte_process_info.nodename);
oshmem_shmem_exchange_bcast(&hostname_length,
sizeof(int),
i);
if (proc->proc_hostname)
free(proc->proc_hostname);
proc->proc_hostname = (
i == ORTE_PROC_MY_NAME->vpid ?
strdup(orte_process_info.nodename) :
(char *) malloc(hostname_length));
oshmem_shmem_exchange_bcast(proc->proc_hostname,
hostname_length,
i);
if (i == ORTE_PROC_MY_NAME->vpid) {
oshmem_proc_local_proc = proc;
} else {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->proc_arch != opal_local_arch) {
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
OBJ_RELEASE(proc->proc_convertor);
proc->proc_convertor = opal_convertor_create(proc->proc_arch, 0);
#else
orte_show_help("help-shmem-runtime.txt",
"heterogeneous-support-unavailable",
true,
orte_process_info.nodename,
proc->proc_hostname == NULL ?
"<hostname unavailable>" :
proc->proc_hostname);
OPAL_THREAD_UNLOCK(&oshmem_proc_lock);
return OSHMEM_ERR_NOT_SUPPORTED;
#endif
}
}
}
OPAL_THREAD_UNLOCK(&oshmem_proc_lock);
return OSHMEM_SUCCESS;
}
int oshmem_proc_pack(oshmem_proc_t **proclist, int oshmem_proc_pack(oshmem_proc_t **proclist,
int proclistsize, int proclistsize,

Просмотреть файл

@ -310,21 +310,6 @@ OSHMEM_DECLSPEC int oshmem_proc_unpack(opal_buffer_t *buf,
int *newproclistsize, int *newproclistsize,
oshmem_proc_t ***newproclist); oshmem_proc_t ***newproclist);
/**
* Refresh the OSHMEM process subsystem
*
* Refresh the Open SHMEM process subsystem. This function will update
* the list of proc instances in the current pe set with
* data from the run-time environemnt.
*
* @note This is primarily used when restarting a process and thus
* need to update the jobid and node name.
*
* @retval OSHMEM_SUCESS System successfully refreshed
* @retval OSHMEM_ERROR Refresh failed due to unspecified error
*/
OSHMEM_DECLSPEC int oshmem_proc_refresh(void);
static inline int oshmem_proc_pe(oshmem_proc_t *proc) static inline int oshmem_proc_pe(oshmem_proc_t *proc)
{ {
return (proc ? (int) proc->proc_name.vpid : -1); return (proc ? (int) proc->proc_name.vpid : -1);

Просмотреть файл

@ -10,75 +10,21 @@
#include "oshmem_config.h" #include "oshmem_config.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/communicator/communicator.h" /*TODO: ompi_communicator_t */
#include "ompi/patterns/comm/coll_ops.h" /*TODO: comm_bcast_pml */
#include "oshmem/constants.h" #include "oshmem/constants.h"
#include "oshmem/runtime/runtime.h" #include "oshmem/runtime/runtime.h"
#include "oshmem/runtime/params.h" #include "oshmem/runtime/params.h"
OSHMEM_DECLSPEC int oshmem_shmem_exchange_allgather(void *buf, int oshmem_shmem_allgather(void *send_buf, void *rcv_buf, int elem_size)
int buf_size)
{ {
int rc = OSHMEM_SUCCESS; int rc;
int i = 0;
int *ranks_in_comm = NULL;
ranks_in_comm = (int *) malloc(orte_process_info.num_procs * sizeof(int)); rc = MPI_Allgather(send_buf, elem_size, MPI_BYTE,
if (NULL == ranks_in_comm) { rcv_buf, elem_size, MPI_BYTE, oshmem_comm_world);
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
for (i = 0; i < (int) orte_process_info.num_procs; ++i) {
ranks_in_comm[i] = i;
}
void* buf_temp = malloc(buf_size);
memcpy(buf_temp, (char*)buf + buf_size * ORTE_PROC_MY_NAME->vpid, buf_size);
rc = comm_allgather_pml( buf_temp,
buf,
buf_size,
MPI_BYTE,
ORTE_PROC_MY_NAME->vpid,
orte_process_info.num_procs,
ranks_in_comm,
(ompi_communicator_t *) &ompi_mpi_comm_world);
if (ranks_in_comm)
free(ranks_in_comm);
if (buf_temp)
free(buf_temp);
return rc; return rc;
} }
OSHMEM_DECLSPEC int oshmem_shmem_exchange_bcast(void *buf, void oshmem_shmem_barrier(void)
int buf_size,
int peer)
{ {
int rc = OSHMEM_SUCCESS; MPI_Barrier(oshmem_comm_world);
int i = 0;
int *ranks_in_comm = NULL;
ranks_in_comm = (int *) malloc(orte_process_info.num_procs * sizeof(int));
if (NULL == ranks_in_comm) {
return OSHMEM_ERR_OUT_OF_RESOURCE;
}
for (i = 0; i < (int) orte_process_info.num_procs; ++i) {
ranks_in_comm[i] = i;
}
rc = comm_bcast_pml((void *) buf,
peer,
buf_size,
MPI_BYTE,
ORTE_PROC_MY_NAME->vpid,
orte_process_info.num_procs,
ranks_in_comm,
(ompi_communicator_t *) &ompi_mpi_comm_world);
if (ranks_in_comm)
free(ranks_in_comm);
return rc;
} }

Просмотреть файл

@ -121,11 +121,14 @@ int oshmem_shmem_finalize(void);
OSHMEM_DECLSPEC int oshmem_shmem_abort(int errcode); OSHMEM_DECLSPEC int oshmem_shmem_abort(int errcode);
/** /**
* Exchange initial info between processes * Allgather between all PEs
*/ */
OSHMEM_DECLSPEC int oshmem_shmem_exchange_allgather(void *buf, int buf_size); OSHMEM_DECLSPEC int oshmem_shmem_allgather(void *send_buf, void *rcv_buf, int elem_size);
OSHMEM_DECLSPEC int oshmem_shmem_exchange_bcast(void *buf, int buf_size, int root); /**
* Barrier between all PEs
*/
OSHMEM_DECLSPEC void oshmem_shmem_barrier(void);
/** /**
* Register OSHMEM specific runtime parameters * Register OSHMEM specific runtime parameters