Updated psm2 mtl with new externally exposed symbols of psm2.so
Fixes open-mpi/ompi#1018 Fixes open-mpi/ompi#1021
Этот коммит содержится в:
родитель
8bf1c95cdc
Коммит
ed16d8e1cc
@ -48,7 +48,7 @@ AC_DEFUN([OMPI_CHECK_PSM2],[
|
|||||||
OPAL_CHECK_PACKAGE([$1],
|
OPAL_CHECK_PACKAGE([$1],
|
||||||
[psm2.h],
|
[psm2.h],
|
||||||
[psm2],
|
[psm2],
|
||||||
[psm_mq_irecv2],
|
[psm2_mq_irecv2],
|
||||||
[],
|
[],
|
||||||
[$ompi_check_psm2_dir],
|
[$ompi_check_psm2_dir],
|
||||||
[$ompi_check_psm2_libdir],
|
[$ompi_check_psm2_libdir],
|
||||||
|
@ -22,7 +22,7 @@ EXTRA_DIST = post_configure.sh
|
|||||||
|
|
||||||
AM_CPPFLAGS = $(mtl_psm2_CPPFLAGS)
|
AM_CPPFLAGS = $(mtl_psm2_CPPFLAGS)
|
||||||
|
|
||||||
dist_ompidata_DATA = help-mtl-psm.txt
|
dist_ompidata_DATA = help-mtl-psm2.txt
|
||||||
|
|
||||||
mtl_psm2_sources = \
|
mtl_psm2_sources = \
|
||||||
mtl_psm2.c \
|
mtl_psm2.c \
|
||||||
|
@ -1,24 +1,25 @@
|
|||||||
# -*- text -*-
|
# -*- text -*-
|
||||||
#
|
#
|
||||||
# Copyright (C) 2009. QLogic Corporation. All rights reserved.
|
# Copyright (C) 2009. QLogic Corporation. All rights reserved.
|
||||||
|
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
#
|
#
|
||||||
# $HEADER$
|
# $HEADER$
|
||||||
#
|
#
|
||||||
[psm init]
|
[psm2 init]
|
||||||
Initialization of PSM library failed.
|
Initialization of PSM2 library failed.
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
#
|
#
|
||||||
[debug level]
|
[debug level]
|
||||||
Unable to set PSM debug level.
|
Unable to set PSM2 debug level.
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
#
|
#
|
||||||
[unable to open endpoint]
|
[unable to open endpoint]
|
||||||
PSM was unable to open an endpoint. Please make sure that the network link is
|
PSM2 was unable to open an endpoint. Please make sure that the network link is
|
||||||
active on the node and the hardware is functioning.
|
active on the node and the hardware is functioning.
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
@ -30,10 +31,10 @@ the environment).
|
|||||||
Local host: %s
|
Local host: %s
|
||||||
#
|
#
|
||||||
[error polling network]
|
[error polling network]
|
||||||
Error %s occurred in attempting to make network progress (psm_mq_ipeek).
|
Error %s occurred in attempting to make network progress (psm2_mq_ipeek).
|
||||||
#
|
#
|
||||||
[error posting receive]
|
[error posting receive]
|
||||||
Unable to post application receive buffer (psm_mq_irecv or psm_mq_imrecv).
|
Unable to post application receive buffer (psm2_mq_irecv or psm2_mq_imrecv).
|
||||||
|
|
||||||
Error: %s
|
Error: %s
|
||||||
Buffer: %p
|
Buffer: %p
|
@ -36,7 +36,7 @@
|
|||||||
|
|
||||||
mca_mtl_psm2_module_t ompi_mtl_psm2 = {
|
mca_mtl_psm2_module_t ompi_mtl_psm2 = {
|
||||||
.super = {
|
.super = {
|
||||||
/* NTH: PSM supports 16 bit context ids */
|
/* NTH: PSM2 supports 16 bit context ids */
|
||||||
.mtl_max_contextid = (1UL << 16) - 1,
|
.mtl_max_contextid = (1UL << 16) - 1,
|
||||||
.mtl_max_tag = (1UL << 30), /* must allow negatives */
|
.mtl_max_tag = (1UL << 30), /* must allow negatives */
|
||||||
|
|
||||||
@ -59,27 +59,27 @@ mca_mtl_psm2_module_t ompi_mtl_psm2 = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static
|
static
|
||||||
psm_error_t
|
psm2_error_t
|
||||||
ompi_mtl_psm2_errhandler(psm_ep_t ep, const psm_error_t error,
|
ompi_mtl_psm2_errhandler(psm2_ep_t ep, const psm2_error_t error,
|
||||||
const char *error_string, psm_error_token_t token)
|
const char *error_string, psm2_error_token_t token)
|
||||||
{
|
{
|
||||||
switch (error) {
|
switch (error) {
|
||||||
/* We don't want PSM to default to exiting when the following errors occur */
|
/* We don't want PSM2 to default to exiting when the following errors occur */
|
||||||
case PSM_EP_DEVICE_FAILURE:
|
case PSM2_EP_DEVICE_FAILURE:
|
||||||
case PSM_EP_NO_DEVICE:
|
case PSM2_EP_NO_DEVICE:
|
||||||
case PSM_EP_NO_PORTS_AVAIL:
|
case PSM2_EP_NO_PORTS_AVAIL:
|
||||||
case PSM_EP_NO_NETWORK:
|
case PSM2_EP_NO_NETWORK:
|
||||||
case PSM_EP_INVALID_UUID_KEY:
|
case PSM2_EP_INVALID_UUID_KEY:
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"unable to open endpoint", true,
|
"unable to open endpoint", true,
|
||||||
psm_error_get_string(error));
|
psm2_error_get_string(error));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* We can't handle any other errors than the ones above */
|
/* We can't handle any other errors than the ones above */
|
||||||
default:
|
default:
|
||||||
opal_output(0, "Open MPI detected an unexpected PSM error in opening "
|
opal_output(0, "Open MPI detected an unexpected PSM2 error in opening "
|
||||||
"an endpoint: %s\n", error_string);
|
"an endpoint: %s\n", error_string);
|
||||||
return psm_error_defer(token);
|
return psm2_error_defer(token);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
@ -88,24 +88,24 @@ ompi_mtl_psm2_errhandler(psm_ep_t ep, const psm_error_t error,
|
|||||||
int ompi_mtl_psm2_progress( void );
|
int ompi_mtl_psm2_progress( void );
|
||||||
|
|
||||||
int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
psm_ep_t ep; /* endpoint handle */
|
psm2_ep_t ep; /* endpoint handle */
|
||||||
psm_mq_t mq;
|
psm2_mq_t mq;
|
||||||
psm_epid_t epid; /* unique lid+port identifier */
|
psm2_epid_t epid; /* unique lid+port identifier */
|
||||||
psm_uuid_t unique_job_key;
|
psm2_uuid_t unique_job_key;
|
||||||
struct psm_ep_open_opts ep_opt;
|
struct psm2_ep_open_opts ep_opt;
|
||||||
unsigned long long *uu = (unsigned long long *) unique_job_key;
|
unsigned long long *uu = (unsigned long long *) unique_job_key;
|
||||||
char *generated_key;
|
char *generated_key;
|
||||||
char env_string[256];
|
char env_string[256];
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
|
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
|
||||||
memset(uu, 0, sizeof(psm_uuid_t));
|
memset(uu, 0, sizeof(psm2_uuid_t));
|
||||||
|
|
||||||
if (!generated_key || (strlen(generated_key) != 33) ||
|
if (!generated_key || (strlen(generated_key) != 33) ||
|
||||||
sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
|
sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
|
||||||
{
|
{
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"no uuid present", true,
|
"no uuid present", true,
|
||||||
generated_key ? "could not be parsed from" :
|
generated_key ? "could not be parsed from" :
|
||||||
"not present in", ompi_process_info.nodename);
|
"not present in", ompi_process_info.nodename);
|
||||||
@ -114,9 +114,9 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Handle our own errors for opening endpoints */
|
/* Handle our own errors for opening endpoints */
|
||||||
psm_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler);
|
psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler);
|
||||||
|
|
||||||
/* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
|
/* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware
|
||||||
* contexts correctly.
|
* contexts correctly.
|
||||||
*/
|
*/
|
||||||
snprintf(env_string, sizeof(env_string), "%d", local_rank);
|
snprintf(env_string, sizeof(env_string), "%d", local_rank);
|
||||||
@ -125,31 +125,31 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
|||||||
setenv("MPI_LOCALNRANKS", env_string, 0);
|
setenv("MPI_LOCALNRANKS", env_string, 0);
|
||||||
|
|
||||||
/* Setup the endpoint options. */
|
/* Setup the endpoint options. */
|
||||||
psm_ep_open_opts_get_defaults(&ep_opt);
|
psm2_ep_open_opts_get_defaults(&ep_opt);
|
||||||
ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9;
|
ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9;
|
||||||
ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP; /* do not let PSM set affinity */
|
ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */
|
||||||
|
|
||||||
/* Open PSM endpoint */
|
/* Open PSM2 endpoint */
|
||||||
err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
|
err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid);
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"unable to open endpoint", true,
|
"unable to open endpoint", true,
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Future errors are handled by the default error handler */
|
/* Future errors are handled by the default error handler */
|
||||||
psm_error_register_handler(ompi_mtl_psm2.ep, PSM_ERRHANDLER_DEFAULT);
|
psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);
|
||||||
|
|
||||||
err = psm_mq_init(ep,
|
err = psm2_mq_init(ep,
|
||||||
0xffff000000000000ULL,
|
0xffff000000000000ULL,
|
||||||
NULL,
|
NULL,
|
||||||
0,
|
0,
|
||||||
&mq);
|
&mq);
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"psm init", true,
|
"psm2 init", true,
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,7 +160,7 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
|||||||
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
|
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
|
||||||
&mca_mtl_psm2_component.super.mtl_version,
|
&mca_mtl_psm2_component.super.mtl_version,
|
||||||
&ompi_mtl_psm2.epid,
|
&ompi_mtl_psm2.epid,
|
||||||
sizeof(psm_epid_t));
|
sizeof(psm2_epid_t));
|
||||||
|
|
||||||
if (OMPI_SUCCESS != rc) {
|
if (OMPI_SUCCESS != rc) {
|
||||||
opal_output(0, "Open MPI couldn't send PSM2 epid to head node process");
|
opal_output(0, "Open MPI couldn't send PSM2 epid to head node process");
|
||||||
@ -168,7 +168,7 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* register the psm progress function */
|
/* register the psm2 progress function */
|
||||||
opal_progress_register(ompi_mtl_psm2_progress);
|
opal_progress_register(ompi_mtl_psm2_progress);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -176,29 +176,29 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
|||||||
|
|
||||||
int
|
int
|
||||||
ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) {
|
ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) {
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
|
|
||||||
opal_progress_unregister(ompi_mtl_psm2_progress);
|
opal_progress_unregister(ompi_mtl_psm2_progress);
|
||||||
|
|
||||||
/* free resources */
|
/* free resources */
|
||||||
err = psm_mq_finalize(ompi_mtl_psm2.mq);
|
err = psm2_mq_finalize(ompi_mtl_psm2.mq);
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_output(0, "Error in psm_mq_finalize (error %s)\n",
|
opal_output(0, "Error in psm2_mq_finalize (error %s)\n",
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = psm_ep_close(ompi_mtl_psm2.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9);
|
err = psm2_ep_close(ompi_mtl_psm2.ep, PSM2_EP_CLOSE_GRACEFUL, 1*1e9);
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_output(0, "Error in psm_ep_close (error %s)\n",
|
opal_output(0, "Error in psm2_ep_close (error %s)\n",
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = psm_finalize();
|
err = psm2_finalize();
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_output(0, "Error in psm_finalize (error %s)\n",
|
opal_output(0, "Error in psm2_finalize (error %s)\n",
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,18 +207,18 @@ ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) {
|
|||||||
|
|
||||||
static
|
static
|
||||||
const char *
|
const char *
|
||||||
ompi_mtl_psm2_connect_error_msg(psm_error_t err)
|
ompi_mtl_psm2_connect_error_msg(psm2_error_t err)
|
||||||
{
|
{
|
||||||
switch (err) { /* See if we expect the error */
|
switch (err) { /* See if we expect the error */
|
||||||
case PSM_EPID_UNREACHABLE:
|
case PSM2_EPID_UNREACHABLE:
|
||||||
case PSM_EPID_INVALID_NODE:
|
case PSM2_EPID_INVALID_NODE:
|
||||||
case PSM_EPID_INVALID_MTU:
|
case PSM2_EPID_INVALID_MTU:
|
||||||
case PSM_EPID_INVALID_UUID_KEY:
|
case PSM2_EPID_INVALID_UUID_KEY:
|
||||||
case PSM_EPID_INVALID_VERSION:
|
case PSM2_EPID_INVALID_VERSION:
|
||||||
case PSM_EPID_INVALID_CONNECT:
|
case PSM2_EPID_INVALID_CONNECT:
|
||||||
return psm_error_get_string(err);
|
return psm2_error_get_string(err);
|
||||||
break;
|
break;
|
||||||
case PSM_EPID_UNKNOWN:
|
case PSM2_EPID_UNKNOWN:
|
||||||
return "Connect status could not be determined "
|
return "Connect status could not be determined "
|
||||||
"because of other errors";
|
"because of other errors";
|
||||||
default:
|
default:
|
||||||
@ -241,23 +241,23 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
{
|
{
|
||||||
int i,j;
|
int i,j;
|
||||||
int rc;
|
int rc;
|
||||||
psm_epid_t *epids_in = NULL;
|
psm2_epid_t *epids_in = NULL;
|
||||||
int *mask_in = NULL;
|
int *mask_in = NULL;
|
||||||
psm_epid_t *epid;
|
psm2_epid_t *epid;
|
||||||
psm_epaddr_t *epaddrs_out = NULL;
|
psm2_epaddr_t *epaddrs_out = NULL;
|
||||||
psm_error_t *errs_out = NULL, err;
|
psm2_error_t *errs_out = NULL, err;
|
||||||
size_t size;
|
size_t size;
|
||||||
int proc_errors[PSM_ERROR_LAST] = { 0 };
|
int proc_errors[PSM2_ERROR_LAST] = { 0 };
|
||||||
int timeout_in_secs;
|
int timeout_in_secs;
|
||||||
|
|
||||||
assert(mtl == &ompi_mtl_psm2.super);
|
assert(mtl == &ompi_mtl_psm2.super);
|
||||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
|
||||||
errs_out = (psm_error_t *) malloc(nprocs * sizeof(psm_error_t));
|
errs_out = (psm2_error_t *) malloc(nprocs * sizeof(psm2_error_t));
|
||||||
if (errs_out == NULL) {
|
if (errs_out == NULL) {
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
epids_in = (psm_epid_t *) malloc(nprocs * sizeof(psm_epid_t));
|
epids_in = (psm2_epid_t *) malloc(nprocs * sizeof(psm2_epid_t));
|
||||||
if (epids_in == NULL) {
|
if (epids_in == NULL) {
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
@ -265,7 +265,7 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
if (mask_in == NULL) {
|
if (mask_in == NULL) {
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
epaddrs_out = (psm_epaddr_t *) malloc(nprocs * sizeof(psm_epaddr_t));
|
epaddrs_out = (psm2_epaddr_t *) malloc(nprocs * sizeof(psm2_epaddr_t));
|
||||||
if (epaddrs_out == NULL) {
|
if (epaddrs_out == NULL) {
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
@ -281,7 +281,7 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
|
|
||||||
OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version,
|
OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version,
|
||||||
&procs[i]->super.proc_name, (void**)&epid, &size);
|
&procs[i]->super.proc_name, (void**)&epid, &size);
|
||||||
if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) {
|
if (rc != OMPI_SUCCESS || size != sizeof(psm2_epid_t)) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
epids_in[i] = *epid;
|
epids_in[i] = *epid;
|
||||||
@ -290,9 +290,9 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
|
|
||||||
timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs);
|
timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs);
|
||||||
|
|
||||||
psm_error_register_handler(ompi_mtl_psm2.ep, PSM_ERRHANDLER_NOP);
|
psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_NOP);
|
||||||
|
|
||||||
err = psm_ep_connect(ompi_mtl_psm2.ep,
|
err = psm2_ep_connect(ompi_mtl_psm2.ep,
|
||||||
nprocs,
|
nprocs,
|
||||||
epids_in,
|
epids_in,
|
||||||
mask_in,
|
mask_in,
|
||||||
@ -302,19 +302,19 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
if (err) {
|
if (err) {
|
||||||
char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err);
|
char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err);
|
||||||
if (errstr == NULL) {
|
if (errstr == NULL) {
|
||||||
opal_output(0, "PSM returned unhandled/unknown connect error: %s\n",
|
opal_output(0, "PSM2 returned unhandled/unknown connect error: %s\n",
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
}
|
}
|
||||||
for (i = 0; i < (int) nprocs; i++) {
|
for (i = 0; i < (int) nprocs; i++) {
|
||||||
if (0 == mask_in[i]) {
|
if (0 == mask_in[i]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
psm_error_t thiserr = errs_out[i];
|
psm2_error_t thiserr = errs_out[i];
|
||||||
errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr);
|
errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr);
|
||||||
if (proc_errors[thiserr] == 0) {
|
if (proc_errors[thiserr] == 0) {
|
||||||
proc_errors[thiserr] = 1;
|
proc_errors[thiserr] = 1;
|
||||||
opal_output(0, "PSM EP connect error (%s):",
|
opal_output(0, "PSM2 EP connect error (%s):",
|
||||||
errstr ? errstr : "unknown connect error");
|
errstr ? errstr : "unknown connect error");
|
||||||
for (j = 0; j < (int) nprocs; j++) {
|
for (j = 0; j < (int) nprocs; j++) {
|
||||||
if (errs_out[j] == thiserr) {
|
if (errs_out[j] == thiserr) {
|
||||||
@ -330,9 +330,9 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Default error handling is enabled, errors will not be returned to
|
/* Default error handling is enabled, errors will not be returned to
|
||||||
* user. PSM prints the error and the offending endpoint's hostname
|
* user. PSM2 prints the error and the offending endpoint's hostname
|
||||||
* and exits with -1 */
|
* and exits with -1 */
|
||||||
psm_error_register_handler(ompi_mtl_psm2.ep, PSM_ERRHANDLER_DEFAULT);
|
psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);
|
||||||
|
|
||||||
/* Fill in endpoint data */
|
/* Fill in endpoint data */
|
||||||
for (i = 0; i < (int) nprocs; i++) {
|
for (i = 0; i < (int) nprocs; i++) {
|
||||||
@ -393,41 +393,41 @@ ompi_mtl_psm2_del_comm(struct mca_mtl_base_module_t *mtl,
|
|||||||
|
|
||||||
|
|
||||||
int ompi_mtl_psm2_progress( void ) {
|
int ompi_mtl_psm2_progress( void ) {
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
mca_mtl_psm2_request_t* mtl_psm2_request;
|
mca_mtl_psm2_request_t* mtl_psm2_request;
|
||||||
psm_mq_status2_t psm_status;
|
psm2_mq_status2_t psm2_status;
|
||||||
psm_mq_req_t req;
|
psm2_mq_req_t req;
|
||||||
int completed = 1;
|
int completed = 1;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
err = psm_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL);
|
err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL);
|
||||||
if (err == PSM_MQ_INCOMPLETE) {
|
if (err == PSM2_MQ_INCOMPLETE) {
|
||||||
return completed;
|
return completed;
|
||||||
} else if (err != PSM_OK) {
|
} else if (err != PSM2_OK) {
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
completed++;
|
completed++;
|
||||||
|
|
||||||
err = psm_mq_test2(&req, &psm_status);
|
err = psm2_mq_test2(&req, &psm2_status);
|
||||||
if (err != PSM_OK) {
|
if (err != PSM2_OK) {
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
mtl_psm2_request = (mca_mtl_psm2_request_t*) psm_status.context;
|
mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context;
|
||||||
|
|
||||||
if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {
|
if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {
|
||||||
|
|
||||||
mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
|
mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
|
||||||
psm_status.msg_tag.tag2;
|
psm2_status.msg_tag.tag2;
|
||||||
mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
|
mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
|
||||||
psm_status.msg_tag.tag1;
|
psm2_status.msg_tag.tag1;
|
||||||
mtl_psm2_request->super.ompi_req->req_status._ucount =
|
mtl_psm2_request->super.ompi_req->req_status._ucount =
|
||||||
psm_status.nbytes;
|
psm2_status.nbytes;
|
||||||
|
|
||||||
ompi_mtl_datatype_unpack(mtl_psm2_request->convertor,
|
ompi_mtl_datatype_unpack(mtl_psm2_request->convertor,
|
||||||
mtl_psm2_request->buf,
|
mtl_psm2_request->buf,
|
||||||
psm_status.msg_length);
|
psm2_status.msg_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
|
if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
|
||||||
@ -436,12 +436,12 @@ int ompi_mtl_psm2_progress( void ) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (psm_status.error_code) {
|
switch (psm2_status.error_code) {
|
||||||
case PSM_OK:
|
case PSM2_OK:
|
||||||
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
|
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
|
||||||
OMPI_SUCCESS;
|
OMPI_SUCCESS;
|
||||||
break;
|
break;
|
||||||
case PSM_MQ_TRUNCATION:
|
case PSM2_MQ_TRUNCATION:
|
||||||
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
|
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
|
||||||
MPI_ERR_TRUNCATE;
|
MPI_ERR_TRUNCATE;
|
||||||
break;
|
break;
|
||||||
@ -456,8 +456,8 @@ int ompi_mtl_psm2_progress( void ) {
|
|||||||
while (1);
|
while (1);
|
||||||
|
|
||||||
error:
|
error:
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"error polling network", true,
|
"error polling network", true,
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -26,28 +26,28 @@ int ompi_mtl_psm2_cancel(struct mca_mtl_base_module_t* mtl,
|
|||||||
struct mca_mtl_request_t *mtl_request,
|
struct mca_mtl_request_t *mtl_request,
|
||||||
int flag) {
|
int flag) {
|
||||||
|
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
psm_mq_status_t status;
|
psm2_mq_status_t status;
|
||||||
|
|
||||||
mca_mtl_psm2_request_t *mtl_psm2_request =
|
mca_mtl_psm2_request_t *mtl_psm2_request =
|
||||||
(mca_mtl_psm2_request_t*) mtl_request;
|
(mca_mtl_psm2_request_t*) mtl_request;
|
||||||
|
|
||||||
/* PSM does not support canceling sends */
|
/* PSM2 does not support canceling sends */
|
||||||
if(OMPI_mtl_psm2_ISEND == mtl_psm2_request->type) {
|
if(OMPI_mtl_psm2_ISEND == mtl_psm2_request->type) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = psm_mq_cancel(&mtl_psm2_request->psm_request);
|
err = psm2_mq_cancel(&mtl_psm2_request->psm2_request);
|
||||||
if(PSM_OK == err) {
|
if(PSM2_OK == err) {
|
||||||
err = psm_mq_test(&mtl_psm2_request->psm_request, &status);
|
err = psm2_mq_test(&mtl_psm2_request->psm2_request, &status);
|
||||||
if(PSM_OK == err) {
|
if(PSM2_OK == err) {
|
||||||
mtl_request->ompi_req->req_status._cancelled = true;
|
mtl_request->ompi_req->req_status._cancelled = true;
|
||||||
mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
|
mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
} else if(PSM_MQ_INCOMPLETE == err) {
|
} else if(PSM2_MQ_INCOMPLETE == err) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
#include "mtl_psm2_types.h"
|
#include "mtl_psm2_types.h"
|
||||||
#include "mtl_psm2_request.h"
|
#include "mtl_psm2_request.h"
|
||||||
|
|
||||||
#include "psm.h"
|
#include "psm2.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@ -80,7 +80,7 @@ ompi_mtl_psm2_component_register(void)
|
|||||||
ompi_mtl_psm2.connect_timeout = 180;
|
ompi_mtl_psm2.connect_timeout = 180;
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version,
|
(void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version,
|
||||||
"connect_timeout",
|
"connect_timeout",
|
||||||
"PSM connection timeout value in seconds",
|
"PSM2 connection timeout value in seconds",
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||||
OPAL_INFO_LVL_9,
|
OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
@ -116,7 +116,7 @@ static int
|
|||||||
ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority)
|
ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* if we get here it means that PSM is available so give high priority
|
* if we get here it means that PSM2 is available so give high priority
|
||||||
*/
|
*/
|
||||||
|
|
||||||
*priority = param_priority;
|
*priority = param_priority;
|
||||||
@ -165,14 +165,14 @@ static mca_mtl_base_module_t *
|
|||||||
ompi_mtl_psm2_component_init(bool enable_progress_threads,
|
ompi_mtl_psm2_component_init(bool enable_progress_threads,
|
||||||
bool enable_mpi_threads)
|
bool enable_mpi_threads)
|
||||||
{
|
{
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
int verno_major = PSM_VERNO_MAJOR;
|
int verno_major = PSM2_VERNO_MAJOR;
|
||||||
int verno_minor = PSM_VERNO_MINOR;
|
int verno_minor = PSM2_VERNO_MINOR;
|
||||||
int local_rank = -1, num_local_procs = 0;
|
int local_rank = -1, num_local_procs = 0;
|
||||||
int num_total_procs = 0;
|
int num_total_procs = 0;
|
||||||
|
|
||||||
/* Compute the total number of processes on this host and our local rank
|
/* Compute the total number of processes on this host and our local rank
|
||||||
* on that node. We need to provide PSM with these values so it can
|
* on that node. We need to provide PSM2 with these values so it can
|
||||||
* allocate hardware contexts appropriately across processes.
|
* allocate hardware contexts appropriately across processes.
|
||||||
*/
|
*/
|
||||||
if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) {
|
if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) {
|
||||||
@ -190,27 +190,27 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = psm_error_register_handler(NULL /* no ep */,
|
err = psm2_error_register_handler(NULL /* no ep */,
|
||||||
PSM_ERRHANDLER_NOP);
|
PSM2_ERRHANDLER_NOP);
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_output(0, "Error in psm_error_register_handler (error %s)\n",
|
opal_output(0, "Error in psm2_error_register_handler (error %s)\n",
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_local_procs == num_total_procs) {
|
if (num_local_procs == num_total_procs) {
|
||||||
setenv("PSM_DEVICES", "self,shm", 0);
|
setenv("PSM2_DEVICES", "self,shm", 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = psm_init(&verno_major, &verno_minor);
|
err = psm2_init(&verno_major, &verno_minor);
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"psm init", true,
|
"psm2 init", true,
|
||||||
psm_error_get_string(err));
|
psm2_error_get_string(err));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Complete PSM initialization */
|
/* Complete PSM2 initialization */
|
||||||
ompi_mtl_psm2_module_init(local_rank, num_local_procs);
|
ompi_mtl_psm2_module_init(local_rank, num_local_procs);
|
||||||
|
|
||||||
ompi_mtl_psm2.super.mtl_request_size =
|
ompi_mtl_psm2.super.mtl_request_size =
|
||||||
|
@ -45,17 +45,17 @@ struct mca_mtl_psm2_endpoint_t {
|
|||||||
struct mca_mtl_psm2_module_t* mtl_psm2_module;
|
struct mca_mtl_psm2_module_t* mtl_psm2_module;
|
||||||
/**< MTL instance that created this connection */
|
/**< MTL instance that created this connection */
|
||||||
|
|
||||||
psm_epid_t peer_epid;
|
psm2_epid_t peer_epid;
|
||||||
/**< The unique epid for the opened port */
|
/**< The unique epid for the opened port */
|
||||||
|
|
||||||
psm_epaddr_t peer_addr;
|
psm2_epaddr_t peer_addr;
|
||||||
/**< The connected endpoint handle*/
|
/**< The connected endpoint handle*/
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t;
|
typedef struct mca_mtl_psm2_endpoint_t mca_mtl_psm2_endpoint_t;
|
||||||
OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint);
|
OBJ_CLASS_DECLARATION(mca_mtl_psm2_endpoint);
|
||||||
|
|
||||||
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
|
static inline mca_mtl_psm2_endpoint_t *ompi_mtl_psm2_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
|
||||||
{
|
{
|
||||||
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
|
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
|
||||||
ompi_mtl_psm2_add_procs (mtl, 1, &ompi_proc);
|
ompi_mtl_psm2_add_procs (mtl, 1, &ompi_proc);
|
||||||
|
@ -35,15 +35,15 @@ int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl,
|
|||||||
int *flag,
|
int *flag,
|
||||||
struct ompi_status_public_t *status)
|
struct ompi_status_public_t *status)
|
||||||
{
|
{
|
||||||
psm_mq_tag_t mqtag, tagsel;
|
psm2_mq_tag_t mqtag, tagsel;
|
||||||
psm_mq_status2_t mqstat;
|
psm2_mq_status2_t mqstat;
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
|
|
||||||
PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
||||||
|
|
||||||
err = psm_mq_iprobe2(ompi_mtl_psm2.mq,
|
err = psm2_mq_iprobe2(ompi_mtl_psm2.mq,
|
||||||
PSM_MQ_ANY_ADDR, &mqtag, &tagsel, &mqstat);
|
PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqstat);
|
||||||
if (err == PSM_OK) {
|
if (err == PSM2_OK) {
|
||||||
*flag = 1;
|
*flag = 1;
|
||||||
if(MPI_STATUS_IGNORE != status) {
|
if(MPI_STATUS_IGNORE != status) {
|
||||||
status->MPI_SOURCE = mqstat.msg_tag.tag2;
|
status->MPI_SOURCE = mqstat.msg_tag.tag2;
|
||||||
@ -51,10 +51,10 @@ int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl,
|
|||||||
status->_ucount = mqstat.nbytes;
|
status->_ucount = mqstat.nbytes;
|
||||||
|
|
||||||
switch (mqstat.error_code) {
|
switch (mqstat.error_code) {
|
||||||
case PSM_OK:
|
case PSM2_OK:
|
||||||
status->MPI_ERROR = OMPI_SUCCESS;
|
status->MPI_ERROR = OMPI_SUCCESS;
|
||||||
break;
|
break;
|
||||||
case PSM_MQ_TRUNCATION:
|
case PSM2_MQ_TRUNCATION:
|
||||||
status->MPI_ERROR = MPI_ERR_TRUNCATE;
|
status->MPI_ERROR = MPI_ERR_TRUNCATE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -64,7 +64,7 @@ int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl,
|
|||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
else if (err == PSM_MQ_INCOMPLETE) {
|
else if (err == PSM2_MQ_INCOMPLETE) {
|
||||||
*flag = 0;
|
*flag = 0;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -83,16 +83,16 @@ ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl,
|
|||||||
struct ompi_status_public_t *status)
|
struct ompi_status_public_t *status)
|
||||||
{
|
{
|
||||||
struct ompi_message_t* msg;
|
struct ompi_message_t* msg;
|
||||||
psm_mq_tag_t mqtag, tagsel;
|
psm2_mq_tag_t mqtag, tagsel;
|
||||||
psm_mq_status2_t mqstat;
|
psm2_mq_status2_t mqstat;
|
||||||
psm_mq_req_t mqreq;
|
psm2_mq_req_t mqreq;
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
|
|
||||||
PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
||||||
|
|
||||||
err = psm_mq_improbe2(ompi_mtl_psm2.mq,
|
err = psm2_mq_improbe2(ompi_mtl_psm2.mq,
|
||||||
PSM_MQ_ANY_ADDR, &mqtag, &tagsel, &mqreq, &mqstat);
|
PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqreq, &mqstat);
|
||||||
if (err == PSM_OK) {
|
if (err == PSM2_OK) {
|
||||||
|
|
||||||
if(MPI_STATUS_IGNORE != status) {
|
if(MPI_STATUS_IGNORE != status) {
|
||||||
status->MPI_SOURCE = mqstat.msg_tag.tag2;
|
status->MPI_SOURCE = mqstat.msg_tag.tag2;
|
||||||
@ -100,10 +100,10 @@ ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl,
|
|||||||
status->_ucount = mqstat.nbytes;
|
status->_ucount = mqstat.nbytes;
|
||||||
|
|
||||||
switch (mqstat.error_code) {
|
switch (mqstat.error_code) {
|
||||||
case PSM_OK:
|
case PSM2_OK:
|
||||||
status->MPI_ERROR = OMPI_SUCCESS;
|
status->MPI_ERROR = OMPI_SUCCESS;
|
||||||
break;
|
break;
|
||||||
case PSM_MQ_TRUNCATION:
|
case PSM2_MQ_TRUNCATION:
|
||||||
status->MPI_ERROR = MPI_ERR_TRUNCATE;
|
status->MPI_ERROR = MPI_ERR_TRUNCATE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -124,7 +124,7 @@ ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl,
|
|||||||
*message = msg;
|
*message = msg;
|
||||||
*matched = 1;
|
*matched = 1;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
} else if(err == PSM_MQ_INCOMPLETE) {
|
} else if(err == PSM2_MQ_INCOMPLETE) {
|
||||||
*matched = 0;
|
*matched = 0;
|
||||||
*message = MPI_MESSAGE_NULL;
|
*message = MPI_MESSAGE_NULL;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -39,10 +39,10 @@ ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
struct mca_mtl_request_t *mtl_request)
|
struct mca_mtl_request_t *mtl_request)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request;
|
mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request;
|
||||||
psm_mq_tag_t mqtag;
|
psm2_mq_tag_t mqtag;
|
||||||
psm_mq_tag_t tagsel;
|
psm2_mq_tag_t tagsel;
|
||||||
size_t length;
|
size_t length;
|
||||||
|
|
||||||
ret = ompi_mtl_datatype_recv_buf(convertor,
|
ret = ompi_mtl_datatype_recv_buf(convertor,
|
||||||
@ -56,22 +56,22 @@ ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
mtl_psm2_request->convertor = convertor;
|
mtl_psm2_request->convertor = convertor;
|
||||||
mtl_psm2_request->type = OMPI_mtl_psm2_IRECV;
|
mtl_psm2_request->type = OMPI_mtl_psm2_IRECV;
|
||||||
|
|
||||||
PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
||||||
|
|
||||||
err = psm_mq_irecv2(ompi_mtl_psm2.mq,
|
err = psm2_mq_irecv2(ompi_mtl_psm2.mq,
|
||||||
PSM_MQ_ANY_ADDR,
|
PSM2_MQ_ANY_ADDR,
|
||||||
&mqtag,
|
&mqtag,
|
||||||
&tagsel,
|
&tagsel,
|
||||||
0,
|
0,
|
||||||
mtl_psm2_request->buf,
|
mtl_psm2_request->buf,
|
||||||
length,
|
length,
|
||||||
mtl_psm2_request,
|
mtl_psm2_request,
|
||||||
&mtl_psm2_request->psm_request);
|
&mtl_psm2_request->psm2_request);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"error posting receive", true,
|
"error posting receive", true,
|
||||||
psm_error_get_string(err),
|
psm2_error_get_string(err),
|
||||||
mtl_psm2_request->buf, length);
|
mtl_psm2_request->buf, length);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
@ -89,11 +89,11 @@ ompi_mtl_psm2_imrecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
mca_mtl_psm2_request_t *mtl_psm2_request =
|
mca_mtl_psm2_request_t *mtl_psm2_request =
|
||||||
(mca_mtl_psm2_request_t*) mtl_request;
|
(mca_mtl_psm2_request_t*) mtl_request;
|
||||||
size_t length;
|
size_t length;
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
mtl_psm2_request->psm_request =
|
mtl_psm2_request->psm2_request =
|
||||||
(psm_mq_req_t)(*message)->req_ptr;
|
(psm2_mq_req_t)(*message)->req_ptr;
|
||||||
|
|
||||||
ret = ompi_mtl_datatype_recv_buf(convertor,
|
ret = ompi_mtl_datatype_recv_buf(convertor,
|
||||||
&mtl_psm2_request->buf,
|
&mtl_psm2_request->buf,
|
||||||
@ -107,14 +107,14 @@ ompi_mtl_psm2_imrecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
mtl_psm2_request->type = OMPI_mtl_psm2_IRECV;
|
mtl_psm2_request->type = OMPI_mtl_psm2_IRECV;
|
||||||
|
|
||||||
|
|
||||||
err = psm_mq_imrecv(ompi_mtl_psm2.mq, 0,
|
err = psm2_mq_imrecv(ompi_mtl_psm2.mq, 0,
|
||||||
mtl_psm2_request->buf, length, mtl_psm2_request,
|
mtl_psm2_request->buf, length, mtl_psm2_request,
|
||||||
&mtl_psm2_request->psm_request);
|
&mtl_psm2_request->psm2_request);
|
||||||
|
|
||||||
if(err) {
|
if(err) {
|
||||||
opal_show_help("help-mtl-psm.txt",
|
opal_show_help("help-mtl-psm2.txt",
|
||||||
"error posting receive", true,
|
"error posting receive", true,
|
||||||
psm_error_get_string(err),
|
psm2_error_get_string(err),
|
||||||
mtl_psm2_request->buf, length);
|
mtl_psm2_request->buf, length);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -32,8 +32,8 @@ typedef enum {
|
|||||||
struct mca_mtl_psm2_request_t {
|
struct mca_mtl_psm2_request_t {
|
||||||
struct mca_mtl_request_t super;
|
struct mca_mtl_request_t super;
|
||||||
mca_mtl_psm2_request_type_t type;
|
mca_mtl_psm2_request_type_t type;
|
||||||
psm_mq_req_t psm_request;
|
psm2_mq_req_t psm2_request;
|
||||||
/* psm_segment_t psm_segment[1]; */
|
/* psm2_segment_t psm2_segment[1]; */
|
||||||
void *buf;
|
void *buf;
|
||||||
size_t length;
|
size_t length;
|
||||||
struct opal_convertor_t *convertor;
|
struct opal_convertor_t *convertor;
|
||||||
|
@ -36,18 +36,18 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl,
|
|||||||
struct opal_convertor_t *convertor,
|
struct opal_convertor_t *convertor,
|
||||||
mca_pml_base_send_mode_t mode)
|
mca_pml_base_send_mode_t mode)
|
||||||
{
|
{
|
||||||
psm_error_t err;
|
psm2_error_t err;
|
||||||
mca_mtl_psm2_request_t mtl_psm2_request;
|
mca_mtl_psm2_request_t mtl_psm2_request;
|
||||||
psm_mq_tag_t mqtag;
|
psm2_mq_tag_t mqtag;
|
||||||
uint32_t flags = 0;
|
uint32_t flags = 0;
|
||||||
int ret;
|
int ret;
|
||||||
size_t length;
|
size_t length;
|
||||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
||||||
mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
|
mca_mtl_psm2_endpoint_t* psm2_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
|
||||||
|
|
||||||
assert(mtl == &ompi_mtl_psm2.super);
|
assert(mtl == &ompi_mtl_psm2.super);
|
||||||
|
|
||||||
PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag);
|
PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag);
|
||||||
|
|
||||||
ret = ompi_mtl_datatype_pack(convertor,
|
ret = ompi_mtl_datatype_pack(convertor,
|
||||||
&mtl_psm2_request.buf,
|
&mtl_psm2_request.buf,
|
||||||
@ -62,10 +62,10 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl,
|
|||||||
if (OMPI_SUCCESS != ret) return ret;
|
if (OMPI_SUCCESS != ret) return ret;
|
||||||
|
|
||||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
|
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
|
||||||
flags |= PSM_MQ_FLAG_SENDSYNC;
|
flags |= PSM2_MQ_FLAG_SENDSYNC;
|
||||||
|
|
||||||
err = psm_mq_send2(ompi_mtl_psm2.mq,
|
err = psm2_mq_send2(ompi_mtl_psm2.mq,
|
||||||
psm_endpoint->peer_addr,
|
psm2_endpoint->peer_addr,
|
||||||
flags,
|
flags,
|
||||||
&mqtag,
|
&mqtag,
|
||||||
mtl_psm2_request.buf,
|
mtl_psm2_request.buf,
|
||||||
@ -75,7 +75,7 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl,
|
|||||||
free(mtl_psm2_request.buf);
|
free(mtl_psm2_request.buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
return err == PSM2_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
@ -88,18 +88,18 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl,
|
|||||||
bool blocking,
|
bool blocking,
|
||||||
mca_mtl_request_t * mtl_request)
|
mca_mtl_request_t * mtl_request)
|
||||||
{
|
{
|
||||||
psm_error_t psm_error;
|
psm2_error_t psm2_error;
|
||||||
psm_mq_tag_t mqtag;
|
psm2_mq_tag_t mqtag;
|
||||||
uint32_t flags = 0;
|
uint32_t flags = 0;
|
||||||
int ret;
|
int ret;
|
||||||
mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request;
|
mca_mtl_psm2_request_t * mtl_psm2_request = (mca_mtl_psm2_request_t*) mtl_request;
|
||||||
size_t length;
|
size_t length;
|
||||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
||||||
mca_mtl_psm2_endpoint_t* psm_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
|
mca_mtl_psm2_endpoint_t* psm2_endpoint = ompi_mtl_psm2_get_endpoint (mtl, ompi_proc);
|
||||||
|
|
||||||
assert(mtl == &ompi_mtl_psm2.super);
|
assert(mtl == &ompi_mtl_psm2.super);
|
||||||
|
|
||||||
PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag);
|
PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag);
|
||||||
|
|
||||||
|
|
||||||
ret = ompi_mtl_datatype_pack(convertor,
|
ret = ompi_mtl_datatype_pack(convertor,
|
||||||
@ -114,16 +114,16 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl,
|
|||||||
if (OMPI_SUCCESS != ret) return ret;
|
if (OMPI_SUCCESS != ret) return ret;
|
||||||
|
|
||||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
|
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
|
||||||
flags |= PSM_MQ_FLAG_SENDSYNC;
|
flags |= PSM2_MQ_FLAG_SENDSYNC;
|
||||||
|
|
||||||
psm_error = psm_mq_isend2(ompi_mtl_psm2.mq,
|
psm2_error = psm2_mq_isend2(ompi_mtl_psm2.mq,
|
||||||
psm_endpoint->peer_addr,
|
psm2_endpoint->peer_addr,
|
||||||
flags,
|
flags,
|
||||||
&mqtag,
|
&mqtag,
|
||||||
mtl_psm2_request->buf,
|
mtl_psm2_request->buf,
|
||||||
length,
|
length,
|
||||||
mtl_psm2_request,
|
mtl_psm2_request,
|
||||||
&mtl_psm2_request->psm_request);
|
&mtl_psm2_request->psm2_request);
|
||||||
|
|
||||||
return psm_error == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
return psm2_error == PSM2_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -45,10 +45,10 @@ struct mca_mtl_psm2_module_t {
|
|||||||
|
|
||||||
int32_t connect_timeout;
|
int32_t connect_timeout;
|
||||||
|
|
||||||
psm_ep_t ep;
|
psm2_ep_t ep;
|
||||||
psm_mq_t mq;
|
psm2_mq_t mq;
|
||||||
psm_epid_t epid;
|
psm2_epid_t epid;
|
||||||
psm_epaddr_t epaddr;
|
psm2_epaddr_t epaddr;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t;
|
typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t;
|
||||||
@ -62,14 +62,14 @@ typedef struct mca_mtl_psm2_component_t mca_mtl_psm2_component_t;
|
|||||||
|
|
||||||
OMPI_DECLSPEC extern mca_mtl_psm2_component_t mca_mtl_psm2_component;
|
OMPI_DECLSPEC extern mca_mtl_psm2_component_t mca_mtl_psm2_component;
|
||||||
|
|
||||||
#define PSM_MAKE_MQTAG(ctxt,rank,utag,tag) \
|
#define PSM2_MAKE_MQTAG(ctxt,rank,utag,tag) \
|
||||||
do { \
|
do { \
|
||||||
(tag).tag0 = ctxt; \
|
(tag).tag0 = ctxt; \
|
||||||
(tag).tag1 = utag; \
|
(tag).tag1 = utag; \
|
||||||
(tag).tag2 = rank; \
|
(tag).tag2 = rank; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define PSM_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, _tagsel) \
|
#define PSM2_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, _tagsel) \
|
||||||
do { \
|
do { \
|
||||||
(tag).tag0 = user_ctxt; \
|
(tag).tag0 = user_ctxt; \
|
||||||
(tag).tag1 = user_tag; \
|
(tag).tag1 = user_tag; \
|
||||||
|
@ -1 +1 @@
|
|||||||
DIRECT_CALL_HEADER="ompi/mca/mtl/psm2/mtl_psm.h"
|
DIRECT_CALL_HEADER="ompi/mca/mtl/psm2/mtl_psm2.h"
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user