Fixes to psm mtl following a more comprehensive testing of intel tests.
This commit was SVN r13471.
Этот коммит содержится в:
родитель
f6e7016cdd
Коммит
e04c55af00
@ -24,6 +24,7 @@
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
#include "mtl_psm.h"
|
||||
#include "mtl_psm_types.h"
|
||||
@ -88,6 +89,7 @@ int ompi_mtl_psm_module_init() {
|
||||
char *generated_key;
|
||||
|
||||
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
|
||||
memset(uu, 0, sizeof(psm_uuid_t));
|
||||
|
||||
if (!generated_key || (strlen(generated_key) != 33) ||
|
||||
sscanf(generated_key, "%016x-%016x", &uu[0], &uu[1]) != 2)
|
||||
@ -100,7 +102,6 @@ int ompi_mtl_psm_module_init() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Handle our own errors for opening endpoints */
|
||||
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
|
||||
|
||||
@ -237,12 +238,6 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t))
|
||||
return OMPI_ERROR;
|
||||
epids_in[i] = *epid;
|
||||
#if 0
|
||||
printf("... connecting to epid=%llu, lid=%d,port=%d\n",
|
||||
(unsigned long long) epids_in[i],
|
||||
(int) psm_epid_nid(epids_in[i]),
|
||||
(int) psm_epid_port(epids_in[i]));
|
||||
#endif
|
||||
}
|
||||
|
||||
timeout_in_secs = min(180, 0.5 * nprocs);
|
||||
@ -264,7 +259,6 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
if (errstr == NULL) {
|
||||
opal_output(0, "PSM returned unhandled/unknown connect error: %s\n",
|
||||
psm_error_get_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
for (i = 0; i < (int) nprocs; i++) {
|
||||
psm_error_t thiserr = errs_out[i];
|
||||
@ -280,19 +274,24 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
opal_output(0, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
rc = OMPI_ERROR;
|
||||
}
|
||||
else {
|
||||
/* Default error handling is enabled, errors will not be returned to
|
||||
* user. PSM prints the error and the offending endpoint's hostname
|
||||
* and exits with -1 */
|
||||
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
|
||||
|
||||
/* Default error handling is enabled, errors will not be returned to user.
|
||||
* PSM prints the error and the offending endpoint's hostname and exits
|
||||
* with -1 */
|
||||
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
|
||||
/* Fill in endpoint data */
|
||||
for (i = 0; i < (int) nprocs; i++) {
|
||||
mtl_peer_data[i] =
|
||||
(mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t);
|
||||
mtl_peer_data[i]->peer_epid = epids_in[i];
|
||||
mtl_peer_data[i]->peer_addr = epaddrs_out[i];
|
||||
}
|
||||
|
||||
/* Fill in endpoint data */
|
||||
for (i = 0; i < (int) nprocs; i++) {
|
||||
mtl_peer_data[i] =
|
||||
(mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t);
|
||||
mtl_peer_data[i]->peer_epid = epids_in[i];
|
||||
mtl_peer_data[i]->peer_addr = epaddrs_out[i];
|
||||
rc = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
bail:
|
||||
|
@ -104,6 +104,11 @@ ompi_mtl_psm_component_init(bool enable_progress_threads,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Only allow for shm and ipath devices in 2.0 and earlier releases
|
||||
* (unless the user overrides the setting).
|
||||
*/
|
||||
setenv("PSM_DEVICES", "shm,ipath", 0);
|
||||
|
||||
err = psm_init(&verno_major, &verno_minor);
|
||||
if (err) {
|
||||
opal_output(0, "Error in psm_init (error %s)\n",
|
||||
@ -111,6 +116,12 @@ ompi_mtl_psm_component_init(bool enable_progress_threads,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable 'self' device only in a post-2.0 release(s)
|
||||
*/
|
||||
if (verno_major == 0x1 && verno_minor >= 0x04)
|
||||
setenv("PSM_DEVICES", "self,shm,ipath", 0);
|
||||
|
||||
ompi_mtl_psm_module_init();
|
||||
|
||||
ompi_mtl_psm.super.mtl_request_size =
|
||||
|
@ -73,6 +73,9 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
|
||||
printf("send bits: 0x%016llx\n", mqtag);
|
||||
#endif
|
||||
|
||||
if (mtl_psm_request.free_after)
|
||||
free(mtl_psm_request.buf);
|
||||
|
||||
return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,7 @@ typedef struct mca_mtl_psm_module_t mca_mtl_psm_module_t;
|
||||
|
||||
extern mca_mtl_psm_module_t ompi_mtl_psm;
|
||||
|
||||
struct mca_mtl_psm_component_t{
|
||||
struct mca_mtl_psm_component_t {
|
||||
mca_mtl_base_component_1_0_0_t super; /**< base MTL component */
|
||||
};
|
||||
typedef struct mca_mtl_psm_component_t mca_mtl_psm_component_t;
|
||||
@ -74,26 +74,14 @@ extern mca_mtl_psm_component_t mca_mtl_psm_component;
|
||||
|
||||
#define PSM_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, tagsel) \
|
||||
do { \
|
||||
if ((user_tag) == MPI_ANY_TAG) { \
|
||||
if ((user_rank) == MPI_ANY_SOURCE) { \
|
||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0,0); \
|
||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),0,0); \
|
||||
} \
|
||||
else { \
|
||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0xffff,0); \
|
||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),(user_rank),0); \
|
||||
} \
|
||||
} \
|
||||
else { \
|
||||
if ((user_rank) == MPI_ANY_SOURCE) { \
|
||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0,0xffffffff); \
|
||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),0,(user_tag)); \
|
||||
} \
|
||||
else { \
|
||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0xffff,0xffffffff); \
|
||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),(user_rank),(user_tag)); \
|
||||
} \
|
||||
(tagsel) = 0xffffffffffffffffULL; \
|
||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),(user_rank),(user_tag)); \
|
||||
if ((user_tag) == MPI_ANY_TAG) { \
|
||||
(tagsel) &= ~0x7fffffffULL; \
|
||||
(tag) &= ~0xffffffffULL; \
|
||||
} \
|
||||
if ((user_rank) == MPI_ANY_SOURCE) \
|
||||
(tagsel) &= ~0xffff00000000ULL; \
|
||||
} while (0)
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user