Use the RTE framework instead of calling ORTE directly.
Brian (rightfully) hit me on the head with the don't-use-ORTE-use-the-rte-framework clue bat; the usnic BTL now nicely plays with the RTE framework. This commit was SVN r28907.
Этот коммит содержится в:
родитель
ca9da8a554
Коммит
4b6006402d
@ -111,9 +111,9 @@ typedef struct ompi_btl_usnic_component_t {
|
||||
char *if_exclude;
|
||||
uint32_t *vendor_part_ids;
|
||||
|
||||
/* Cached hashed version of my ORTE proc name (to stuff in
|
||||
/* Cached hashed version of my RTE proc name (to stuff in
|
||||
protocol headers) */
|
||||
uint64_t my_hashed_orte_name;
|
||||
uint64_t my_hashed_rte_name;
|
||||
|
||||
/** array of available BTLs */
|
||||
struct ompi_btl_usnic_module_t* usnic_modules;
|
||||
|
@ -49,11 +49,9 @@
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
#include "opal/mca/memchecker/base/base.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "ompi/mca/rte/rte.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
@ -275,9 +273,9 @@ static int check_reg_mem_basics(void)
|
||||
asprintf(&str_limit, "Unknown");
|
||||
}
|
||||
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "check_reg_mem_basics fail",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "check_reg_mem_basics fail",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
str_limit);
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
@ -352,8 +350,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
************************************************************************/
|
||||
|
||||
/* initialization */
|
||||
mca_btl_usnic_component.my_hashed_orte_name =
|
||||
orte_util_hash_name(&(ompi_proc_local()->proc_name));
|
||||
mca_btl_usnic_component.my_hashed_rte_name =
|
||||
ompi_rte_hash_name(&(ompi_proc_local()->proc_name));
|
||||
|
||||
seed_prng();
|
||||
|
||||
@ -375,7 +373,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
malloc(mca_btl_usnic_component.num_modules *
|
||||
sizeof(ompi_btl_usnic_module_t*));
|
||||
if (NULL == btls) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
btls = NULL;
|
||||
goto free_include_list;
|
||||
}
|
||||
@ -386,7 +383,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
sizeof(ompi_btl_usnic_module_t));
|
||||
if (NULL == mca_btl_usnic_component.usnic_modules) {
|
||||
free(btls);
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
btls = NULL;
|
||||
goto free_include_list;
|
||||
}
|
||||
@ -415,7 +411,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
filter = NULL;
|
||||
}
|
||||
|
||||
num_local_procs = orte_process_info.num_local_peers;
|
||||
num_local_procs = ompi_process_info.num_local_peers;
|
||||
|
||||
/* Go through the list of ports and determine if we want it or
|
||||
not. Create and (mostly) fill a module struct for each port
|
||||
@ -467,9 +463,9 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
|
||||
/* Query this device */
|
||||
if (0 != ibv_query_device(module->device_context, &device_attr)) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_query_device", __FILE__, __LINE__,
|
||||
@ -487,10 +483,10 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
char *str;
|
||||
asprintf(&str, "Not enough usNIC QPs (found %d, need %d)",
|
||||
device_attr.max_qp, num_local_procs * 2);
|
||||
orte_show_help("help-mpi-btl-usnic.txt",
|
||||
opal_show_help("help-mpi-btl-usnic.txt",
|
||||
"not enough usnic resources",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
str);
|
||||
free(str);
|
||||
@ -501,10 +497,10 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
char *str;
|
||||
asprintf(&str, "Not enough usNIC CQs (found %d, need %d)",
|
||||
device_attr.max_cq, num_local_procs * 2);
|
||||
orte_show_help("help-mpi-btl-usnic.txt",
|
||||
opal_show_help("help-mpi-btl-usnic.txt",
|
||||
"not enough usnic resources",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
str);
|
||||
free(str);
|
||||
@ -538,7 +534,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
* override.
|
||||
*/
|
||||
if (-1 == mca_btl_usnic_component.prio_sd_num) {
|
||||
module->prio_sd_num = max(128, 32*orte_process_info.num_procs) - 1;
|
||||
module->prio_sd_num =
|
||||
max(128, 32 * ompi_process_info.num_procs) - 1;
|
||||
} else {
|
||||
module->prio_sd_num = mca_btl_usnic_component.prio_sd_num;
|
||||
}
|
||||
@ -546,7 +543,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
module->prio_sd_num = device_attr.max_qp_wr;
|
||||
}
|
||||
if (-1 == mca_btl_usnic_component.prio_rd_num) {
|
||||
module->prio_rd_num = max(128, 32*orte_process_info.num_procs) - 1;
|
||||
module->prio_rd_num =
|
||||
max(128, 32 * ompi_process_info.num_procs) - 1;
|
||||
} else {
|
||||
module->prio_rd_num = mca_btl_usnic_component.prio_rd_num;
|
||||
}
|
||||
@ -908,7 +906,7 @@ static int usnic_component_progress(void)
|
||||
static void seed_prng(void)
|
||||
{
|
||||
unsigned short seedv[3];
|
||||
seedv[0] = ORTE_PROC_MY_NAME->vpid;
|
||||
seedv[0] = OMPI_PROC_MY_NAME->vpid;
|
||||
seedv[1] = opal_timer_base_get_cycles();
|
||||
usleep(1);
|
||||
seedv[2] = opal_timer_base_get_cycles();
|
||||
@ -963,9 +961,9 @@ static int init_module_from_port(ompi_btl_usnic_module_t *module,
|
||||
module->port_num,
|
||||
mca_btl_usnic_component.gid_index, &gid)) {
|
||||
opal_memchecker_base_mem_defined(&gid, sizeof(gid));
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_query_gid", __FILE__, __LINE__,
|
||||
@ -1011,9 +1009,9 @@ static int init_module_from_port(ompi_btl_usnic_module_t *module,
|
||||
/* If we don't get OMPI_SUCCESS, then we weren't able
|
||||
to figure out what the bandwidth was of this port.
|
||||
That's a bad sign. Let's ignore this port. */
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "verbs_port_bw failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "verbs_port_bw failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num);
|
||||
return OMPI_ERROR;
|
||||
@ -1077,7 +1075,6 @@ static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
|
||||
/* Get a wrapper for the filter */
|
||||
filter = calloc(sizeof(*filter), 1);
|
||||
if (NULL == filter) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1090,7 +1087,6 @@ static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
|
||||
/* upper bound: each entry could be a mask */
|
||||
filter->elts = malloc(sizeof(*filter->elts) * n_argv);
|
||||
if (NULL == filter->elts) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
free(filter);
|
||||
opal_argv_free(argv);
|
||||
return NULL;
|
||||
@ -1120,8 +1116,8 @@ static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
|
||||
tmp = strdup(argv[i]);
|
||||
str = strchr(argv[i], '/');
|
||||
if (NULL == str) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
|
||||
true, name, orte_process_info.nodename,
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
|
||||
true, name, ompi_process_info.nodename,
|
||||
tmp, "Invalid specification (missing \"/\")");
|
||||
free(tmp);
|
||||
continue;
|
||||
@ -1129,8 +1125,8 @@ static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
|
||||
*str = '\0';
|
||||
argv_prefix = atoi(str + 1);
|
||||
if (argv_prefix < 1 || argv_prefix > 32) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
|
||||
true, name, orte_process_info.nodename,
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
|
||||
true, name, ompi_process_info.nodename,
|
||||
tmp, "Invalid specification (prefix < 1 or prefix >32)");
|
||||
free(tmp);
|
||||
continue;
|
||||
@ -1141,8 +1137,8 @@ static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
|
||||
ret = inet_pton(AF_INET, argv[i],
|
||||
&((struct sockaddr_in*) &argv_inaddr)->sin_addr);
|
||||
if (1 != ret) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
|
||||
true, name, orte_process_info.nodename, tmp,
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
|
||||
true, name, ompi_process_info.nodename, tmp,
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
free(tmp);
|
||||
continue;
|
||||
|
@ -30,8 +30,7 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include "opal/prefetch.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "ompi/types.h"
|
||||
|
||||
|
@ -40,7 +40,7 @@ common_send_seg_helper(
|
||||
bseg = &seg->ss_base;
|
||||
|
||||
bseg->us_btl_header = (ompi_btl_usnic_btl_header_t *)bseg->us_list.ptr;
|
||||
bseg->us_btl_header->sender = mca_btl_usnic_component.my_hashed_orte_name;
|
||||
bseg->us_btl_header->sender = mca_btl_usnic_component.my_hashed_rte_name;
|
||||
|
||||
/* build verbs work request descriptor */
|
||||
seg->ss_send_desc.wr_id = (unsigned long) seg;
|
||||
|
@ -94,7 +94,7 @@ typedef enum {
|
||||
* holes.
|
||||
*/
|
||||
typedef struct {
|
||||
/* Hashed ORTE process name of the sender */
|
||||
/* Hashed RTE process name of the sender */
|
||||
uint64_t sender;
|
||||
|
||||
/* Sliding window sequence number (echoed back in an ACK). This
|
||||
|
@ -30,10 +30,9 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/include/opal_stdint.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "ompi/mca/rte/rte.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
@ -1314,9 +1313,9 @@ static void module_async_event_callback(int fd, short flags, void *arg)
|
||||
case IBV_EVENT_GID_CHANGE:
|
||||
#endif
|
||||
default:
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "async event",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "async event",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
ibv_event_type_str(event.event_type),
|
||||
@ -1376,9 +1375,9 @@ init_qp(
|
||||
job is consuming QPs. */
|
||||
channel->qp = ibv_create_qp(module->pd, &qp_init_attr);
|
||||
if (NULL == channel->qp) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "create ibv resource failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "create ibv resource failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
"ibv_create_qp()", __FILE__, __LINE__,
|
||||
"Failed to create a usNIC queue pair");
|
||||
@ -1395,9 +1394,9 @@ init_qp(
|
||||
|
||||
if (ibv_modify_qp(channel->qp, &qp_attr,
|
||||
IBV_QP_STATE | IBV_QP_PORT)) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_modify_qp()", __FILE__, __LINE__,
|
||||
@ -1410,9 +1409,9 @@ init_qp(
|
||||
memset(&qp_init_attr, 0, sizeof(qp_init_attr));
|
||||
if (ibv_query_qp(channel->qp, &qp_attr, IBV_QP_CAP,
|
||||
&qp_init_attr) != 0) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_query_qp()", __FILE__, __LINE__,
|
||||
@ -1438,9 +1437,9 @@ static int move_qp_to_rtr(ompi_btl_usnic_module_t *module,
|
||||
|
||||
qp_attr.qp_state = IBV_QPS_RTR;
|
||||
if (ibv_modify_qp(channel->qp, &qp_attr, IBV_QP_STATE)) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_modify_qp", __FILE__, __LINE__,
|
||||
@ -1461,9 +1460,9 @@ static int move_qp_to_rts(ompi_btl_usnic_module_t *module,
|
||||
|
||||
qp_attr.qp_state = IBV_QPS_RTS;
|
||||
if (ibv_modify_qp(channel->qp, &qp_attr, IBV_QP_STATE)) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_modify_qp", __FILE__, __LINE__,
|
||||
@ -1534,9 +1533,9 @@ ompi_btl_usnic_channel_init(
|
||||
job is consuming CQs. */
|
||||
channel->cq = ibv_create_cq(ctx, module->cq_num, NULL, NULL, 0);
|
||||
if (NULL == channel->cq) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "create ibv resource failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "create ibv resource failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
"ibv_create_cq()", __FILE__, __LINE__,
|
||||
"Failed to create a usNIC completion queue");
|
||||
@ -1570,10 +1569,10 @@ ompi_btl_usnic_channel_init(
|
||||
rseg = (ompi_btl_usnic_recv_segment_t*)item;
|
||||
|
||||
if (NULL == rseg) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt",
|
||||
opal_show_help("help-mpi-btl-usnic.txt",
|
||||
"internal error during init",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"get freelist buffer()", __FILE__, __LINE__,
|
||||
@ -1586,9 +1585,9 @@ ompi_btl_usnic_channel_init(
|
||||
rseg->rs_recv_desc.next = NULL;
|
||||
|
||||
if (ibv_post_recv(channel->qp, &rseg->rs_recv_desc, &bad_wr)) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_post_recv", __FILE__, __LINE__,
|
||||
@ -1641,15 +1640,15 @@ int ompi_btl_usnic_module_init(ompi_btl_usnic_module_t *module)
|
||||
/* Setup the pointer array for the procs that will be used by this
|
||||
module */
|
||||
OBJ_CONSTRUCT(&module->all_procs, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&module->all_procs, orte_process_info.num_procs,
|
||||
opal_pointer_array_init(&module->all_procs, ompi_process_info.num_procs,
|
||||
INT_MAX, 32);
|
||||
|
||||
/* Get a PD */
|
||||
module->pd = ibv_alloc_pd(ctx);
|
||||
if (NULL == module->pd) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_alloc_pd()", __FILE__, __LINE__,
|
||||
@ -1668,9 +1667,9 @@ int ompi_btl_usnic_module_init(ompi_btl_usnic_module_t *module)
|
||||
mca_mpool_base_module_create(mca_btl_usnic_component.usnic_mpool_name,
|
||||
&module->super, &mpool_resources);
|
||||
if (NULL == module->super.btl_mpool) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"create mpool", __FILE__, __LINE__,
|
||||
|
@ -25,10 +25,9 @@
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/util/arch.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "ompi/mca/rte/rte.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/constants.h"
|
||||
|
||||
@ -107,12 +106,12 @@ ompi_btl_usnic_proc_lookup_ompi(ompi_proc_t* ompi_proc)
|
||||
|
||||
|
||||
/*
|
||||
* Look for an existing usnic proc based on a hashed ORTE process
|
||||
* Look for an existing usnic proc based on a hashed RTE process
|
||||
* name.
|
||||
*/
|
||||
ompi_btl_usnic_endpoint_t *
|
||||
ompi_btl_usnic_proc_lookup_endpoint(ompi_btl_usnic_module_t *receiver,
|
||||
uint64_t sender_hashed_orte_name)
|
||||
uint64_t sender_hashed_rte_name)
|
||||
{
|
||||
size_t i;
|
||||
uint32_t mynet, peernet;
|
||||
@ -125,8 +124,8 @@ ompi_btl_usnic_proc_lookup_endpoint(ompi_btl_usnic_module_t *receiver,
|
||||
opal_list_get_end(&mca_btl_usnic_component.usnic_procs);
|
||||
proc = (ompi_btl_usnic_proc_t*)
|
||||
opal_list_get_next(proc)) {
|
||||
if (orte_util_hash_name(&proc->proc_ompi->proc_name) ==
|
||||
sender_hashed_orte_name) {
|
||||
if (ompi_rte_hash_name(&proc->proc_ompi->proc_name) ==
|
||||
sender_hashed_rte_name) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -186,9 +185,9 @@ static ompi_btl_usnic_proc_t *create_proc(ompi_proc_t *ompi_proc)
|
||||
&size);
|
||||
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "internal error during init",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "internal error during init",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
"<none>", 0,
|
||||
"ompi_modex_recv() failed", __FILE__, __LINE__,
|
||||
opal_strerror(rc));
|
||||
@ -200,11 +199,11 @@ static ompi_btl_usnic_proc_t *create_proc(ompi_proc_t *ompi_proc)
|
||||
|
||||
snprintf(msg, sizeof(msg),
|
||||
"sizeof(modex for peer %s data) == %d, expected multiple of %d",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name),
|
||||
OMPI_NAME_PRINT(&ompi_proc->proc_name),
|
||||
(int) size, (int) sizeof(ompi_btl_usnic_addr_t));
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "internal error during init",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "internal error during init",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
"<none>", 0,
|
||||
"invalid modex data", __FILE__, __LINE__,
|
||||
msg);
|
||||
@ -223,7 +222,7 @@ static ompi_btl_usnic_proc_t *create_proc(ompi_proc_t *ompi_proc)
|
||||
proc->proc_modex_claimed = (bool*)
|
||||
calloc(proc->proc_modex_count, sizeof(bool));
|
||||
if (NULL == proc->proc_modex_claimed) {
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
OBJ_RELEASE(proc);
|
||||
return NULL;
|
||||
}
|
||||
@ -231,7 +230,7 @@ static ompi_btl_usnic_proc_t *create_proc(ompi_proc_t *ompi_proc)
|
||||
proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
||||
calloc(proc->proc_modex_count, sizeof(mca_btl_base_endpoint_t*));
|
||||
if (NULL == proc->proc_endpoints) {
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
OBJ_RELEASE(proc);
|
||||
return NULL;
|
||||
}
|
||||
@ -303,9 +302,9 @@ static int match_modex(ompi_btl_usnic_module_t *module,
|
||||
peer_hostname =
|
||||
"<unknown -- please run with mpi_keep_peer_hostnames=1>";
|
||||
}
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "MTU mismatch",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
module->if_mtu,
|
||||
@ -333,7 +332,7 @@ ompi_btl_usnic_create_endpoint(ompi_btl_usnic_module_t *module,
|
||||
if (modex_index < 0) {
|
||||
opal_output_verbose(5, USNIC_OUT,
|
||||
"btl:usnic:create_endpoint: did not find usnic modex info for peer %s",
|
||||
ORTE_NAME_PRINT(&proc->proc_ompi->proc_name));
|
||||
OMPI_NAME_PRINT(&proc->proc_ompi->proc_name));
|
||||
return OMPI_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
@ -358,9 +357,9 @@ ompi_btl_usnic_create_endpoint(ompi_btl_usnic_module_t *module,
|
||||
|
||||
endpoint->endpoint_remote_ah = ibv_create_ah(module->pd, &ah_attr);
|
||||
if (NULL == endpoint->endpoint_remote_ah) {
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "ibv API failed",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
ibv_get_device_name(module->device),
|
||||
module->port_num,
|
||||
"ibv_create_ah()", __FILE__, __LINE__,
|
||||
|
@ -67,7 +67,7 @@ struct ompi_btl_usnic_module_t;
|
||||
|
||||
ompi_btl_usnic_endpoint_t *
|
||||
ompi_btl_usnic_proc_lookup_endpoint(struct ompi_btl_usnic_module_t *receiver,
|
||||
uint64_t sender_hashed_orte_name);
|
||||
uint64_t sender_hashed_rte_name);
|
||||
|
||||
int ompi_btl_usnic_proc_match(ompi_proc_t* ompi_proc,
|
||||
struct ompi_btl_usnic_module_t *module,
|
||||
|
@ -53,9 +53,9 @@ lookup_sender(ompi_btl_usnic_module_t *module, ompi_btl_usnic_segment_t *seg)
|
||||
int ret;
|
||||
ompi_btl_usnic_endpoint_t *sender;
|
||||
|
||||
/* Use the hashed ORTE process name in the BTL header to uniquely
|
||||
/* Use the hashed RTE process name in the BTL header to uniquely
|
||||
identify the sending process (using the MAC/hardware address
|
||||
only identifies the sending server -- not the sending ORTE
|
||||
only identifies the sending server -- not the sending RTE
|
||||
process). */
|
||||
/* JMS Cesare suggests using a handshake before sending any data
|
||||
so that instead of looking up a hash on the btl_header->sender,
|
||||
|
@ -13,9 +13,9 @@
|
||||
#include <unistd.h>
|
||||
#include <infiniband/verbs.h>
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "ompi/mca/rte/rte.h"
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include "btl_usnic_util.h"
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
void ompi_btl_usnic_exit(void)
|
||||
{
|
||||
orte_errmgr.abort(1, NULL);
|
||||
ompi_rte_abort(1, NULL);
|
||||
|
||||
/* If the error manager returns, wait to be killed */
|
||||
while (1) {
|
||||
@ -181,16 +181,16 @@ uint32_t ompi_btl_usnic_get_ipv4_subnet(uint32_t addrn, uint32_t cidr_len)
|
||||
|
||||
/*
|
||||
* Simple utility in a .c file, mainly so that inline functions in .h
|
||||
* files don't need to include ORTE header files.
|
||||
* files don't need to include RTE header files.
|
||||
*/
|
||||
void ompi_btl_usnic_util_abort(const char *msg, const char *file, int line,
|
||||
int ret)
|
||||
{
|
||||
orte_show_help("help-mpi-btl-usnic.txt", "internal error after init",
|
||||
opal_show_help("help-mpi-btl-usnic.txt", "internal error after init",
|
||||
true,
|
||||
orte_process_info.nodename,
|
||||
ompi_process_info.nodename,
|
||||
msg, file, line, strerror(ret));
|
||||
|
||||
orte_errmgr.abort(ret, NULL);
|
||||
ompi_rte_abort(ret, NULL);
|
||||
/* Never returns */
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user