From 21b170b43bf9cf234ddd25acc07b0c68683d4ff9 Mon Sep 17 00:00:00 2001 From: Vasily Filipov Date: Mon, 4 Feb 2013 06:59:24 +0000 Subject: [PATCH] MTL MXM: push commit r27987 back, now with right user. r27987 - MTL MXM: ver. 2.0 interface changes. This commit was SVN r28026. The following SVN revision numbers were found above: r27987 --> open-mpi/ompi@2735658d81eb7ec171c8c4f7831df8d3425e2099 --- ompi/mca/mtl/mxm/mtl_mxm.c | 150 ++++++++++++++++++++++++----- ompi/mca/mtl/mxm/mtl_mxm_cancel.c | 8 ++ ompi/mca/mtl/mxm/mtl_mxm_recv.c | 4 + ompi/mca/mtl/mxm/mtl_mxm_request.h | 9 +- ompi/mca/mtl/mxm/mtl_mxm_send.c | 3 + 5 files changed, 147 insertions(+), 27 deletions(-) diff --git a/ompi/mca/mtl/mxm/mtl_mxm.c b/ompi/mca/mtl/mxm/mtl_mxm.c index fcdfddacfe..37b11845b0 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm.c +++ b/ompi/mca/mtl/mxm/mtl_mxm.c @@ -98,14 +98,14 @@ static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm return OMPI_SUCCESS; } #else -static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, +static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, int dest_rank, mxm_domain_id_t domain) { size_t addrlen; mxm_error_t err; addrlen = sizeof(ep_info->dest_addr[domain]); - err = mxm_ep_address(ompi_mtl_mxm.ep, domain, + err = mxm_ep_address(ompi_mtl_mxm.ep, domain, dest_rank, (struct sockaddr *) &ep_info->dest_addr[domain], &addrlen); if (MXM_OK == err) { ep_info->domain_bitmap |= MXM_BIT(domain); @@ -123,7 +123,11 @@ static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, #define max(a,b) ((a)>(b)?(a):(b)) static mxm_error_t ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_bitmap, int lr, - uint32_t jobid, uint64_t mxlr, int nlps) { + uint32_t jobid, uint64_t mxlr, int nlps +#if MXM_API >= MXM_VERSION(2, 0) + , int totps +#endif + ) { mxm_error_t err; #if MXM_API < MXM_VERSION(1,5) @@ -169,10 +173,15 @@ static mxm_error_t ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_ return err; } +#if MXM_API >= MXM_VERSION(2, 0) + err = mxm_ep_create(ctx, ep_opts, ep, totps); +#else ep_opts->job_id = jobid; ep_opts->local_rank = lr; ep_opts->num_local_procs = nlps; err = mxm_ep_create(ctx, ep_opts, ep); +#endif + mxm_config_free(ep_opts); #endif return err; @@ -190,9 +199,92 @@ static void ompi_mtl_mxm_set_conn_req(mxm_conn_req_t *conn_req, ompi_mtl_mxm_ep_ } #endif +#if MXM_API >= MXM_VERSION(2,0) +#define MTL_MXM_MODEX_MAX_SIZE ((size_t)0x60) +static int ompi_mtl_mxm_send_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, int totps) +{ + int rc, dest; + + mca_mtl_base_component_2_0_0_t *cm = &mca_mtl_mxm_component.super; + char *modex_key, *mxm_version = mca_base_component_to_string(&cm->mtl_version); + + /* Rough approximation of the next string length: mxm_version-dest_rank-portion_num */ + modex_key = malloc(strlen(mxm_version) + 8 * sizeof(int) + 8 * sizeof(size_t) + 2); + if (NULL == modex_key) { + MXM_ERROR("Cannot allocate memory."); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* + * Send information using modex (in some case there is limitation on data size for example ess/pmi) + * set size of data sent for once + */ + for (dest = 0; dest < totps; ++dest) { + /* + * Get address for each PTL on this endpoint, and share it with other ranks. + */ + int modex_name_id = 0; + + size_t modex_cur_size, modex_buf_size = sizeof(*ep_info); + unsigned char *modex_buf_ptr = (unsigned char *) ep_info; + + modex_cur_size = modex_buf_size < MTL_MXM_MODEX_MAX_SIZE ? + modex_buf_size : MTL_MXM_MODEX_MAX_SIZE; + + ep_info->domain_bitmap = 0; + + rc = ompi_mtl_mxm_get_ep_address(ep_info, dest, MXM_DOMAIN_SELF); + if (OMPI_SUCCESS != rc) { + MXM_ERROR("Failed to get endpoint address: for domain SELF dest %d.", dest); + return OMPI_ERROR; + } + + rc = ompi_mtl_mxm_get_ep_address(ep_info, dest, MXM_DOMAIN_SHM); + if (OMPI_SUCCESS != rc) { + MXM_ERROR("Failed to get endpoint address: for domain SHM dest %d.", dest); + return OMPI_ERROR; + } + + rc = ompi_mtl_mxm_get_ep_address(ep_info, dest, MXM_DOMAIN_IB); + if (OMPI_SUCCESS != rc) { + MXM_ERROR("Failed to get endpoint address: for domain IB dest %d.", dest); + return OMPI_ERROR; + } + + while (modex_buf_size) { + /* Modex key looks as mtl.mxm.1.5-1-18 where mtl.mxm.1.5 is the component, + 1 is a destination rank and 18 is a portion index */ + sprintf(modex_key, "%s-%d-%d", mxm_version, dest, modex_name_id); + + rc = ompi_modex_send_string((const char *) modex_key, modex_buf_ptr, modex_cur_size); + if (OMPI_SUCCESS != rc) { + MXM_ERROR("Open MPI couldn't distribute EP connection details"); + + free(modex_key); + free(mxm_version); + + return OMPI_ERROR; + } + + ++modex_name_id; + + modex_buf_ptr += modex_cur_size; + modex_buf_size -= modex_cur_size; + + modex_cur_size = modex_buf_size < MTL_MXM_MODEX_MAX_SIZE ? + modex_buf_size : MTL_MXM_MODEX_MAX_SIZE; + } + } + + free(modex_key); + free(mxm_version); + + return OMPI_SUCCESS; +} +#endif + int ompi_mtl_mxm_module_init(void) { - ompi_mtl_mxm_ep_conn_info_t ep_info; mxm_error_t err; uint32_t jobid; @@ -248,7 +340,11 @@ int ompi_mtl_mxm_module_init(void) /* Open MXM endpoint */ err = ompi_mtl_mxm_create_ep(ompi_mtl_mxm.mxm_context, &ompi_mtl_mxm.ep, - ptl_bitmap, lr, jobid, mxlr, nlps); + ptl_bitmap, lr, jobid, mxlr, nlps +#if MXM_API >= MXM_VERSION(2, 0) + , totps +#endif + ); if (MXM_OK != err) { ompi_show_help("help-mtl-mxm.txt", "unable to create endpoint", true, @@ -272,19 +368,7 @@ int ompi_mtl_mxm_module_init(void) OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) { return OMPI_ERROR; } -#else - ep_info.domain_bitmap = 0; - if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SELF)) { - return OMPI_ERROR; - } - if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_SHM)) { - return OMPI_ERROR; - } - if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_DOMAIN_IB)) { - return OMPI_ERROR; - } -#endif - + /* * send information using modex (in some case there is limitation on data size for example ess/pmi) * set size of data sent for once @@ -303,10 +387,10 @@ int ompi_mtl_mxm_module_init(void) sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id); if (OMPI_SUCCESS != ompi_modex_send_string((const char *)modex_name, modex_buf_ptr, modex_cur_size)) { - MXM_ERROR("Open MPI couldn't distribute EP connection details"); + MXM_ERROR("Open MPI couldn't distribute EP connection details"); free(modex_component_name); free(modex_name); - return OMPI_ERROR; + return OMPI_ERROR; } modex_name_id++; modex_buf_ptr += modex_cur_size; @@ -316,7 +400,17 @@ int ompi_mtl_mxm_module_init(void) free(modex_component_name); free(modex_name); } - +#else + { + int rc; + rc = ompi_mtl_mxm_send_ep_address(&ep_info, totps); + if (OMPI_SUCCESS != rc) { + MXM_ERROR("Modex session failed."); + return rc; + } + } +#endif + /* Register the MXM progress function */ opal_progress_register(ompi_mtl_mxm_progress); return OMPI_SUCCESS; @@ -363,12 +457,20 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs, size_t modex_buf_size = sizeof(ompi_mtl_mxm_ep_conn_info_t); size_t modex_cur_size = 0; char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version); - char *modex_name = malloc(strlen(modex_component_name) + 5); int modex_name_id = 0; - +#if MXM_API < MXM_VERSION(2,0) + char *modex_name = malloc(strlen(modex_component_name) + 5); +#else + char *modex_name = malloc(strlen(modex_component_name) + 8 * sizeof(int) + 8 * sizeof(size_t) + 2); +#endif + while (modex_buf_size > 0) { - /* modex name looks as mtl.mxm.1.5-18 where mtl.mxm.1.5 is the component and 18 is portion index */ + /* modex name looks as mtl.mxm.1.5-18 where mtl.mxm.1.5 is the component and 18 is portion index */ +#if MXM_API < MXM_VERSION(2,0) sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id); +#else + sprintf(modex_name, "%s-%d-%d", modex_component_name, ompi_process_info.my_name.vpid, modex_name_id); +#endif if (OMPI_SUCCESS != ompi_modex_recv_string((const char *)modex_name, procs[i], (void**)&modex_buf_ptr, &modex_cur_size)) { MXM_ERROR("Open MPI couldn't distribute EP connection details"); diff --git a/ompi/mca/mtl/mxm/mtl_mxm_cancel.c b/ompi/mca/mtl/mxm/mtl_mxm_cancel.c index f10dabea8d..bc6d6b1064 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_cancel.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_cancel.c @@ -17,7 +17,15 @@ int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl, mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request; mxm_error_t err; +#if MXM_API >= MXM_VERSION(2,0) + if (mtl_mxm_request->is_send) { + err = mxm_req_cancel_send(&mtl_mxm_request->mxm.send); + } else { + err = mxm_req_cancel_recv(&mtl_mxm_request->mxm.recv); + } +#else err = mxm_req_cancel(&mtl_mxm_request->mxm.base); +#endif if ((err != MXM_OK) && (err != MXM_ERR_NO_PROGRESS)) { return OMPI_ERROR; } diff --git a/ompi/mca/mtl/mxm/mtl_mxm_recv.c b/ompi/mca/mtl/mxm/mtl_mxm_recv.c index 8581f85fa8..ea81fafc2c 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_recv.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_recv.c @@ -50,6 +50,10 @@ static inline __opal_attribute_always_inline__ int return ret; } +#if MXM_API >= MXM_VERSION(2,0) + mtl_mxm_request->is_send = 0; +#endif + mxm_recv_req->base.state = MXM_REQ_NEW; mxm_recv_req->base.flags = 0; diff --git a/ompi/mca/mtl/mxm/mtl_mxm_request.h b/ompi/mca/mtl/mxm/mtl_mxm_request.h index 9bcf5058b2..a3c103996a 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_request.h +++ b/ompi/mca/mtl/mxm/mtl_mxm_request.h @@ -17,10 +17,13 @@ struct mca_mtl_mxm_request_t { struct mca_mtl_request_t super; union { - mxm_req_base_t base; - mxm_send_req_t send; - mxm_recv_req_t recv; + mxm_req_base_t base; + mxm_send_req_t send; + mxm_recv_req_t recv; } mxm; +#if MXM_API >= MXM_VERSION(2,0) + int is_send; +#endif /* mxm_segment_t mxm_segment[1]; */ void *buf; size_t length; diff --git a/ompi/mca/mtl/mxm/mtl_mxm_send.c b/ompi/mca/mtl/mxm/mtl_mxm_send.c index cd3ddc0722..0f1fc04ea0 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_send.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_send.c @@ -162,6 +162,9 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl, } mxm_send_req = &mtl_mxm_request->mxm.send; +#if MXM_API >= MXM_VERSION(2,0) + mtl_mxm_request->is_send = 1; +#endif /* prepare a send request embedded in the MTL request */ mxm_send_req->base.state = MXM_REQ_NEW;