Fixes to psm mtl following a more comprehensive testing of intel tests.
This commit was SVN r13471.
Этот коммит содержится в:
родитель
f6e7016cdd
Коммит
e04c55af00
@ -24,6 +24,7 @@
|
|||||||
#include "opal/class/opal_list.h"
|
#include "opal/class/opal_list.h"
|
||||||
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
||||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||||
|
#include "ompi/proc/proc.h"
|
||||||
|
|
||||||
#include "mtl_psm.h"
|
#include "mtl_psm.h"
|
||||||
#include "mtl_psm_types.h"
|
#include "mtl_psm_types.h"
|
||||||
@ -88,6 +89,7 @@ int ompi_mtl_psm_module_init() {
|
|||||||
char *generated_key;
|
char *generated_key;
|
||||||
|
|
||||||
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
|
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
|
||||||
|
memset(uu, 0, sizeof(psm_uuid_t));
|
||||||
|
|
||||||
if (!generated_key || (strlen(generated_key) != 33) ||
|
if (!generated_key || (strlen(generated_key) != 33) ||
|
||||||
sscanf(generated_key, "%016x-%016x", &uu[0], &uu[1]) != 2)
|
sscanf(generated_key, "%016x-%016x", &uu[0], &uu[1]) != 2)
|
||||||
@ -100,7 +102,6 @@ int ompi_mtl_psm_module_init() {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Handle our own errors for opening endpoints */
|
/* Handle our own errors for opening endpoints */
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
|
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
|
||||||
|
|
||||||
@ -237,12 +238,6 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t))
|
if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t))
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
epids_in[i] = *epid;
|
epids_in[i] = *epid;
|
||||||
#if 0
|
|
||||||
printf("... connecting to epid=%llu, lid=%d,port=%d\n",
|
|
||||||
(unsigned long long) epids_in[i],
|
|
||||||
(int) psm_epid_nid(epids_in[i]),
|
|
||||||
(int) psm_epid_port(epids_in[i]));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeout_in_secs = min(180, 0.5 * nprocs);
|
timeout_in_secs = min(180, 0.5 * nprocs);
|
||||||
@ -264,7 +259,6 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
if (errstr == NULL) {
|
if (errstr == NULL) {
|
||||||
opal_output(0, "PSM returned unhandled/unknown connect error: %s\n",
|
opal_output(0, "PSM returned unhandled/unknown connect error: %s\n",
|
||||||
psm_error_get_string(err));
|
psm_error_get_string(err));
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
}
|
||||||
for (i = 0; i < (int) nprocs; i++) {
|
for (i = 0; i < (int) nprocs; i++) {
|
||||||
psm_error_t thiserr = errs_out[i];
|
psm_error_t thiserr = errs_out[i];
|
||||||
@ -280,19 +274,24 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
opal_output(0, "\n");
|
opal_output(0, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Default error handling is enabled, errors will not be returned to user.
|
rc = OMPI_ERROR;
|
||||||
* PSM prints the error and the offending endpoint's hostname and exits
|
}
|
||||||
* with -1 */
|
else {
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
|
/* Default error handling is enabled, errors will not be returned to
|
||||||
|
* user. PSM prints the error and the offending endpoint's hostname
|
||||||
|
* and exits with -1 */
|
||||||
|
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
|
||||||
|
|
||||||
/* Fill in endpoint data */
|
/* Fill in endpoint data */
|
||||||
for (i = 0; i < (int) nprocs; i++) {
|
for (i = 0; i < (int) nprocs; i++) {
|
||||||
mtl_peer_data[i] =
|
mtl_peer_data[i] =
|
||||||
(mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t);
|
(mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t);
|
||||||
mtl_peer_data[i]->peer_epid = epids_in[i];
|
mtl_peer_data[i]->peer_epid = epids_in[i];
|
||||||
mtl_peer_data[i]->peer_addr = epaddrs_out[i];
|
mtl_peer_data[i]->peer_addr = epaddrs_out[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bail:
|
bail:
|
||||||
|
@ -104,12 +104,23 @@ ompi_mtl_psm_component_init(bool enable_progress_threads,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Only allow for shm and ipath devices in 2.0 and earlier releases
|
||||||
|
* (unless the user overrides the setting).
|
||||||
|
*/
|
||||||
|
setenv("PSM_DEVICES", "shm,ipath", 0);
|
||||||
|
|
||||||
err = psm_init(&verno_major, &verno_minor);
|
err = psm_init(&verno_major, &verno_minor);
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_output(0, "Error in psm_init (error %s)\n",
|
opal_output(0, "Error in psm_init (error %s)\n",
|
||||||
psm_error_get_string(err));
|
psm_error_get_string(err));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enable 'self' device only in a post-2.0 release(s)
|
||||||
|
*/
|
||||||
|
if (verno_major == 0x1 && verno_minor >= 0x04)
|
||||||
|
setenv("PSM_DEVICES", "self,shm,ipath", 0);
|
||||||
|
|
||||||
ompi_mtl_psm_module_init();
|
ompi_mtl_psm_module_init();
|
||||||
|
|
||||||
|
@ -73,6 +73,9 @@ ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
|
|||||||
printf("send bits: 0x%016llx\n", mqtag);
|
printf("send bits: 0x%016llx\n", mqtag);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (mtl_psm_request.free_after)
|
||||||
|
free(mtl_psm_request.buf);
|
||||||
|
|
||||||
return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ typedef struct mca_mtl_psm_module_t mca_mtl_psm_module_t;
|
|||||||
|
|
||||||
extern mca_mtl_psm_module_t ompi_mtl_psm;
|
extern mca_mtl_psm_module_t ompi_mtl_psm;
|
||||||
|
|
||||||
struct mca_mtl_psm_component_t{
|
struct mca_mtl_psm_component_t {
|
||||||
mca_mtl_base_component_1_0_0_t super; /**< base MTL component */
|
mca_mtl_base_component_1_0_0_t super; /**< base MTL component */
|
||||||
};
|
};
|
||||||
typedef struct mca_mtl_psm_component_t mca_mtl_psm_component_t;
|
typedef struct mca_mtl_psm_component_t mca_mtl_psm_component_t;
|
||||||
@ -74,26 +74,14 @@ extern mca_mtl_psm_component_t mca_mtl_psm_component;
|
|||||||
|
|
||||||
#define PSM_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, tagsel) \
|
#define PSM_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, tagsel) \
|
||||||
do { \
|
do { \
|
||||||
if ((user_tag) == MPI_ANY_TAG) { \
|
(tagsel) = 0xffffffffffffffffULL; \
|
||||||
if ((user_rank) == MPI_ANY_SOURCE) { \
|
(tag) = PSM_MAKE_MQTAG((user_ctxt),(user_rank),(user_tag)); \
|
||||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0,0); \
|
if ((user_tag) == MPI_ANY_TAG) { \
|
||||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),0,0); \
|
(tagsel) &= ~0x7fffffffULL; \
|
||||||
} \
|
(tag) &= ~0xffffffffULL; \
|
||||||
else { \
|
|
||||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0xffff,0); \
|
|
||||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),(user_rank),0); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
else { \
|
|
||||||
if ((user_rank) == MPI_ANY_SOURCE) { \
|
|
||||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0,0xffffffff); \
|
|
||||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),0,(user_tag)); \
|
|
||||||
} \
|
|
||||||
else { \
|
|
||||||
(tagsel) = PSM_MAKE_MQTAG(0xffff,0xffff,0xffffffff); \
|
|
||||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),(user_rank),(user_tag)); \
|
|
||||||
} \
|
|
||||||
} \
|
} \
|
||||||
|
if ((user_rank) == MPI_ANY_SOURCE) \
|
||||||
|
(tagsel) &= ~0xffff00000000ULL; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user