1
1

MTL MXM: push commit r27987 back, now with right user.

r27987 - MTL MXM: ver. 2.0 interface changes.

This commit was SVN r28026.

The following SVN revision numbers were found above:
  r27987 --> open-mpi/ompi@2735658d81
Этот коммит содержится в:
Vasily Filipov 2013-02-04 06:59:24 +00:00
родитель aa5e436479
Коммит 21b170b43b
5 изменённых файлов: 147 добавлений и 27 удалений

Просмотреть файл

@ -98,14 +98,14 @@ static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm
return OMPI_SUCCESS;
}
#else
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info,
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, int dest_rank,
mxm_domain_id_t domain)
{
size_t addrlen;
mxm_error_t err;
addrlen = sizeof(ep_info->dest_addr[domain]);
err = mxm_ep_address(ompi_mtl_mxm.ep, domain,
err = mxm_ep_address(ompi_mtl_mxm.ep, domain, dest_rank,
(struct sockaddr *) &ep_info->dest_addr[domain], &addrlen);
if (MXM_OK == err) {
ep_info->domain_bitmap |= MXM_BIT(domain);
@ -123,7 +123,11 @@ static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info,
#define max(a,b) ((a)>(b)?(a):(b))
static mxm_error_t ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_bitmap, int lr,
uint32_t jobid, uint64_t mxlr, int nlps) {
uint32_t jobid, uint64_t mxlr, int nlps
#if MXM_API >= MXM_VERSION(2, 0)
, int totps
#endif
) {
mxm_error_t err;
#if MXM_API < MXM_VERSION(1,5)
@ -169,10 +173,15 @@ static mxm_error_t ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_
return err;
}
#if MXM_API >= MXM_VERSION(2, 0)
err = mxm_ep_create(ctx, ep_opts, ep, totps);
#else
ep_opts->job_id = jobid;
ep_opts->local_rank = lr;
ep_opts->num_local_procs = nlps;
err = mxm_ep_create(ctx, ep_opts, ep);
#endif
mxm_config_free(ep_opts);
#endif
return err;
@ -190,9 +199,92 @@ static void ompi_mtl_mxm_set_conn_req(mxm_conn_req_t *conn_req, ompi_mtl_mxm_ep_
}
#endif
#if MXM_API >= MXM_VERSION(2,0)
#define MTL_MXM_MODEX_MAX_SIZE ((size_t)0x60)
static int ompi_mtl_mxm_send_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, int totps)
{
int rc, dest;
mca_mtl_base_component_2_0_0_t *cm = &mca_mtl_mxm_component.super;
char *modex_key, *mxm_version = mca_base_component_to_string(&cm->mtl_version);
/* Rough approximation of the next string length: mxm_version-dest_rank-portion_num */
modex_key = malloc(strlen(mxm_version) + 8 * sizeof(int) + 8 * sizeof(size_t) + 2);
if (NULL == modex_key) {
MXM_ERROR("Cannot allocate memory.");
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
* Send information using modex (in some case there is limitation on data size for example ess/pmi)
* set size of data sent for once
*/
for (dest = 0; dest < totps; ++dest) {
/*
* Get address for each PTL on this endpoint, and share it with other ranks.
*/
int modex_name_id = 0;
size_t modex_cur_size, modex_buf_size = sizeof(*ep_info);
unsigned char *modex_buf_ptr = (unsigned char *) ep_info;
modex_cur_size = modex_buf_size < MTL_MXM_MODEX_MAX_SIZE ?
modex_buf_size : MTL_MXM_MODEX_MAX_SIZE;
ep_info->domain_bitmap = 0;
rc = ompi_mtl_mxm_get_ep_address(ep_info, dest, MXM_DOMAIN_SELF);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Failed to get endpoint address: for domain SELF dest %d.", dest);
return OMPI_ERROR;
}
rc = ompi_mtl_mxm_get_ep_address(ep_info, dest, MXM_DOMAIN_SHM);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Failed to get endpoint address: for domain SHM dest %d.", dest);
return OMPI_ERROR;
}
rc = ompi_mtl_mxm_get_ep_address(ep_info, dest, MXM_DOMAIN_IB);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Failed to get endpoint address: for domain IB dest %d.", dest);
return OMPI_ERROR;
}
while (modex_buf_size) {
/* Modex key looks as mtl.mxm.1.5-1-18 where mtl.mxm.1.5 is the component,
1 is a destination rank and 18 is a portion index */
sprintf(modex_key, "%s-%d-%d", mxm_version, dest, modex_name_id);
rc = ompi_modex_send_string((const char *) modex_key, modex_buf_ptr, modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
free(modex_key);
free(mxm_version);
return OMPI_ERROR;
}
++modex_name_id;
modex_buf_ptr += modex_cur_size;
modex_buf_size -= modex_cur_size;
modex_cur_size = modex_buf_size < MTL_MXM_MODEX_MAX_SIZE ?
modex_buf_size : MTL_MXM_MODEX_MAX_SIZE;
}
}
free(modex_key);
free(mxm_version);
return OMPI_SUCCESS;
}
#endif
int ompi_mtl_mxm_module_init(void)
{
ompi_mtl_mxm_ep_conn_info_t ep_info;
mxm_error_t err;
uint32_t jobid;
@ -248,7 +340,11 @@ int ompi_mtl_mxm_module_init(void)
/* Open MXM endpoint */
err = ompi_mtl_mxm_create_ep(ompi_mtl_mxm.mxm_context, &ompi_mtl_mxm.ep,
ptl_bitmap, lr, jobid, mxlr, nlps);
ptl_bitmap, lr, jobid, mxlr, nlps
#if MXM_API >= MXM_VERSION(2, 0)
, totps
#endif
);
if (MXM_OK != err) {
ompi_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
@ -272,18 +368,6 @@ int ompi_mtl_mxm_module_init(void)
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
return OMPI_ERROR;
}
#else
ep_info.domain_bitmap = 0;
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SELF)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SHM)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_IB)) {
return OMPI_ERROR;
}
#endif
/*
* send information using modex (in some case there is limitation on data size for example ess/pmi)
@ -316,6 +400,16 @@ int ompi_mtl_mxm_module_init(void)
free(modex_component_name);
free(modex_name);
}
#else
{
int rc;
rc = ompi_mtl_mxm_send_ep_address(&ep_info, totps);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Modex session failed.");
return rc;
}
}
#endif
/* Register the MXM progress function */
opal_progress_register(ompi_mtl_mxm_progress);
@ -363,12 +457,20 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
size_t modex_buf_size = sizeof(ompi_mtl_mxm_ep_conn_info_t);
size_t modex_cur_size = 0;
char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version);
char *modex_name = malloc(strlen(modex_component_name) + 5);
int modex_name_id = 0;
#if MXM_API < MXM_VERSION(2,0)
char *modex_name = malloc(strlen(modex_component_name) + 5);
#else
char *modex_name = malloc(strlen(modex_component_name) + 8 * sizeof(int) + 8 * sizeof(size_t) + 2);
#endif
while (modex_buf_size > 0) {
/* modex name looks as mtl.mxm.1.5-18 where mtl.mxm.1.5 is the component and 18 is portion index */
#if MXM_API < MXM_VERSION(2,0)
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
#else
sprintf(modex_name, "%s-%d-%d", modex_component_name, ompi_process_info.my_name.vpid, modex_name_id);
#endif
if (OMPI_SUCCESS != ompi_modex_recv_string((const char *)modex_name, procs[i], (void**)&modex_buf_ptr, &modex_cur_size)) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");

Просмотреть файл

@ -17,7 +17,15 @@ int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mxm_error_t err;
#if MXM_API >= MXM_VERSION(2,0)
if (mtl_mxm_request->is_send) {
err = mxm_req_cancel_send(&mtl_mxm_request->mxm.send);
} else {
err = mxm_req_cancel_recv(&mtl_mxm_request->mxm.recv);
}
#else
err = mxm_req_cancel(&mtl_mxm_request->mxm.base);
#endif
if ((err != MXM_OK) && (err != MXM_ERR_NO_PROGRESS)) {
return OMPI_ERROR;
}

Просмотреть файл

@ -50,6 +50,10 @@ static inline __opal_attribute_always_inline__ int
return ret;
}
#if MXM_API >= MXM_VERSION(2,0)
mtl_mxm_request->is_send = 0;
#endif
mxm_recv_req->base.state = MXM_REQ_NEW;
mxm_recv_req->base.flags = 0;

Просмотреть файл

@ -21,6 +21,9 @@ struct mca_mtl_mxm_request_t {
mxm_send_req_t send;
mxm_recv_req_t recv;
} mxm;
#if MXM_API >= MXM_VERSION(2,0)
int is_send;
#endif
/* mxm_segment_t mxm_segment[1]; */
void *buf;
size_t length;

Просмотреть файл

@ -162,6 +162,9 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
}
mxm_send_req = &mtl_mxm_request->mxm.send;
#if MXM_API >= MXM_VERSION(2,0)
mtl_mxm_request->is_send = 1;
#endif
/* prepare a send request embedded in the MTL request */
mxm_send_req->base.state = MXM_REQ_NEW;