1
1

btl/usnic: update for libfabric v1.4

With libfabric v1.4, the usnic provider changed the values of its
fabric and domain name strings (compared to libfabric <v1.4).  Update
the Open MPI usNIC BTL to handle both pre-v1.4 and v1.4 fabric/domain
names.

Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
Этот коммит содержится в:
Jeff Squyres 2016-08-19 19:07:14 -07:00
родитель 6de64ddbc1
Коммит 6f5e377fe0
10 изменённых файлов: 130 добавлений и 85 удалений

Просмотреть файл

@ -197,7 +197,7 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
/* Ensure to NULL-terminate the passed strings */
strncpy(cmd.nodename, opal_process_info.nodename,
CONNECTIVITY_NODENAME_LEN - 1);
strncpy(cmd.usnic_name, module->fabric_info->fabric_attr->name,
strncpy(cmd.usnic_name, module->linux_device_name,
CONNECTIVITY_IFNAME_LEN - 1);
if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
@ -536,7 +536,7 @@ opal_btl_usnic_prepare_src(
#if MSGDEBUG2
opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
(reserve + *size) <= module->max_frag_payload?"small":"large",
(void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize,
(void *)convertor);
@ -723,7 +723,7 @@ opal_btl_usnic_prepare_src(struct mca_btl_base_module_t *base_module,
#if MSGDEBUG2
opal_output(0, "prep_src: %s %s frag %p, size=%d+%u (was %u), conv=%p\n",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
(reserve + *size) <= module->max_frag_payload?"small":"large",
(void *)frag, (int)reserve, (unsigned)*size, (unsigned)osize,
(void *)convertor);

Просмотреть файл

@ -378,7 +378,7 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
"not enough usnic resources",
true,
opal_process_info.nodename,
info->fabric_attr->name,
module->linux_device_name,
str);
return OPAL_ERROR;
}
@ -543,10 +543,12 @@ static bool filter_module(opal_btl_usnic_module_t *module,
struct fi_usnic_info *uip;
struct fi_info *info;
bool match;
const char *linux_device_name;
info = module->fabric_info;
uip = &module->usnic_info;
src = info->src_addr;
linux_device_name = module->linux_device_name;
module_mask = src->sin_addr.s_addr & uip->ui.v1.ui_netmask_be;
match = false;
for (i = 0; i < filter->n_elt; ++i) {
@ -559,7 +561,7 @@ static bool filter_module(opal_btl_usnic_module_t *module,
}
}
else {
if (strcmp(filter->elts[i].if_name, info->fabric_attr->name) == 0) {
if (strcmp(filter->elts[i].if_name, linux_device_name) == 0) {
match = true;
break;
}
@ -590,6 +592,25 @@ static void free_filter(usnic_if_filter_t *filter)
free(filter);
}
static int do_fi_getinfo(uint32_t version, struct fi_info **info_list)
{
struct fi_info hints = {0};
struct fi_ep_attr ep_attr = {0};
struct fi_fabric_attr fabric_attr = {0};
/* We only want providers named "usnic" that are of type EP_DGRAM */
fabric_attr.prov_name = "usnic";
ep_attr.type = FI_EP_DGRAM;
hints.caps = FI_MSG;
hints.mode = FI_LOCAL_MR | FI_MSG_PREFIX;
hints.addr_format = FI_SOCKADDR;
hints.ep_attr = &ep_attr;
hints.fabric_attr = &fabric_attr;
return fi_getinfo(version, NULL, 0, 0, &hints, info_list);
}
/*
* UD component initialization:
* (1) read interface list from kernel and compare against component
@ -611,9 +632,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
int min_distance, num_local_procs;
struct fi_info *info_list;
struct fi_info *info;
struct fi_info hints = {0};
struct fi_ep_attr ep_attr = {0};
struct fi_fabric_attr fabric_attr = {0};
struct fid_fabric *fabric;
struct fid_domain *domain;
int ret;
@ -636,19 +654,9 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
OBJ_CONSTRUCT(&btl_usnic_lock, opal_recursive_mutex_t);
/* We only want providers named "usnic that are of type EP_DGRAM */
fabric_attr.prov_name = "usnic";
ep_attr.type = FI_EP_DGRAM;
hints.caps = FI_MSG;
hints.mode = FI_LOCAL_MR | FI_MSG_PREFIX;
hints.addr_format = FI_SOCKADDR;
hints.ep_attr = &ep_attr;
hints.fabric_attr = &fabric_attr;
/* This code understands libfabric API v1.0 and v1.1. Even if we
were compiled with libfabric API v1.0, we still want to request
v1.1 -- here's why:
/* This code understands libfabric API versions v1.0, v1.1, and
v1.4. Even if we were compiled with libfabric API v1.0, we
still want to request v1.1 -- here's why:
- In libfabric v1.0.0 (i.e., API v1.0), the usnic provider did
not check the value of the "version" parameter passed into
@ -664,6 +672,17 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
So never request API v1.0 -- always request a minimum of
v1.1.
The usnic provider changed the strings in the fabric and domain
names in API v1.4. With API <= v1.3:
- fabric name is "usnic_X" (device name)
- domain name is NULL
With libfabric API >= v1.4:
- fabric name is "a.b.c.d/e" (CIDR notation of network)
- domain name is "usnic_X" (device name)
NOTE: The configure.m4 in this component will require libfabric
>= v1.1.0 (i.e., it won't accept v1.0.0) because of a critical
bug in the usnic provider in libfabric v1.0.0. However, the
@ -677,9 +696,17 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
Someday, #2 may no longer be true, and we may therefore rip out
the libfabric v1.0.0 compatibility code. */
/* First try API version 1.4. If that doesn't work, try API
version 1.1. */
uint32_t libfabric_api;
libfabric_api = FI_VERSION(1, 4);
ret = do_fi_getinfo(libfabric_api, &info_list);
// Libfabric core will return -FI_ENOSYS if it is too old
if (-FI_ENOSYS == ret) {
libfabric_api = FI_VERSION(1, 1);
ret = fi_getinfo(libfabric_api, NULL, 0, 0, &hints, &info_list);
ret = do_fi_getinfo(libfabric_api, &info_list);
}
if (0 != ret) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: disqualifiying myself due to fi_getinfo failure: %s (%d)", strerror(-ret), ret);
@ -800,13 +827,21 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
i < mca_btl_usnic_component.max_modules);
++i, info = info->next) {
// The fabric/domain names changed at libfabric API v1.4 (see above).
char *linux_device_name;
if (libfabric_api <= FI_VERSION(1, 3)) {
linux_device_name = info->fabric_attr->name;
} else {
linux_device_name = info->domain_attr->name;
}
ret = fi_fabric(info->fabric_attr, &fabric, NULL);
if (0 != ret) {
opal_show_help("help-mpi-btl-usnic.txt",
"libfabric API failed",
true,
opal_process_info.nodename,
info->fabric_attr->name,
linux_device_name,
"fi_fabric()", __FILE__, __LINE__,
ret,
strerror(-ret));
@ -820,7 +855,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
"libfabric API failed",
true,
opal_process_info.nodename,
info->fabric_attr->name,
linux_device_name,
"fi_domain()", __FILE__, __LINE__,
ret,
strerror(-ret));
@ -829,8 +864,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
opal_memchecker_base_mem_defined(&domain, sizeof(domain));
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: found: usNIC direct device %s",
info->fabric_attr->name);
"btl:usnic: found: usNIC device %s",
linux_device_name);
/* Save a little info on the module that we have already
gathered. The rest of the module will be filled in
@ -841,6 +876,12 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
module->fabric = fabric;
module->domain = domain;
module->fabric_info = info;
module->libfabric_api = libfabric_api;
module->linux_device_name = strdup(linux_device_name);
if (NULL == module->linux_device_name) {
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
goto error;
}
/* Obtain usnic-specific device info (e.g., netmask) that
doesn't come in the normal fi_getinfo(). This allows us to
@ -850,7 +891,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
if (ret != 0) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: device %s fabric_open_ops failed %d (%s)",
info->fabric_attr->name, ret, fi_strerror(-ret));
module->linux_device_name, ret, fi_strerror(-ret));
fi_close(&domain->fid);
fi_close(&fabric->fid);
continue;
@ -863,14 +904,14 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
if (ret != 0) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: device %s usnic_getinfo failed %d (%s)",
info->fabric_attr->name, ret, fi_strerror(-ret));
module->linux_device_name, ret, fi_strerror(-ret));
fi_close(&domain->fid);
fi_close(&fabric->fid);
continue;
}
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: device %s usnic_info: link speed=%d, netmask=0x%x, ifname=%s, num_vf=%d, qp/vf=%d, cq/vf=%d",
info->fabric_attr->name,
module->linux_device_name,
(unsigned int) module->usnic_info.ui.v1.ui_link_speed,
(unsigned int) module->usnic_info.ui.v1.ui_netmask_be,
module->usnic_info.ui.v1.ui_ifname,
@ -884,7 +925,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: %s %s due to %s",
(keep_module ? "keeping" : "skipping"),
info->fabric_attr->name,
module->linux_device_name,
(filter_incl ? "if_include" : "if_exclude"));
if (!keep_module) {
fi_close(&domain->fid);
@ -902,7 +943,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: device %s is not provisioned with enough resources -- skipping",
info->fabric_attr->name);
module->linux_device_name);
fi_close(&domain->fid);
fi_close(&fabric->fid);
@ -916,7 +957,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: device %s looks good!",
info->fabric_attr->name);
module->linux_device_name);
/* Let this module advance to the next round! */
btls[j++] = &(module->super);
@ -966,7 +1007,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
btls[num_final_modules++] = &(module->super);
/* Output all of this module's values. */
const char *devname = module->fabric_info->fabric_attr->name;
const char *devname = module->linux_device_name;
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d, num aveqe=%d",
devname,
@ -1212,7 +1253,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
if (cq_ret != -FI_EAVAIL) {
BTL_ERROR(("%s: cq_read ret = %d (%s)",
module->fabric_info->fabric_attr->name, cq_ret,
module->linux_device_name, cq_ret,
fi_strerror(-cq_ret)));
channel->chan_error = true;
}
@ -1222,7 +1263,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
return;
} else if (rc != mca_btl_usnic_component.cq_readerr_success_value) {
BTL_ERROR(("%s: cq_readerr ret = %d (expected %d)",
module->fabric_info->fabric_attr->name, rc,
module->linux_device_name, rc,
(int) mca_btl_usnic_component.cq_readerr_success_value));
channel->chan_error = true;
}
@ -1235,7 +1276,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
static int once = 0;
if (once++ == 0) {
BTL_ERROR(("%s: Channel %d, %s",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
channel->chan_index,
FI_ECRC == err_entry.prov_errno ?
"CRC error" : "message truncation"));
@ -1256,7 +1297,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
}
} else {
BTL_ERROR(("%s: CQ[%d] prov_err = %d",
module->fabric_info->fabric_attr->name, channel->chan_index,
module->linux_device_name, channel->chan_index,
err_entry.prov_errno));
channel->chan_error = true;
}
@ -1469,7 +1510,7 @@ void opal_btl_usnic_component_debug(void)
module = mca_btl_usnic_component.usnic_active_modules[i];
opal_output(0, "active_modules[%d]=%p %s max{frag,chunk,tiny}=%llu,%llu,%llu\n",
i, (void *)module, module->fabric_info->fabric_attr->name,
i, (void *)module, module->linux_device_name,
(unsigned long long)module->max_frag_payload,
(unsigned long long)module->max_chunk_payload,
(unsigned long long)module->max_tiny_payload);

Просмотреть файл

@ -162,7 +162,7 @@ static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module)
if (obj->type != HWLOC_OBJ_NODE) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:filter_numa: could not find NUMA node for %s; filtering by NUMA distance not possible",
module->fabric_info->fabric_attr->name);
module->linux_device_name);
return NULL;
}
@ -218,7 +218,7 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module)
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:filter_numa: %s is distance %d from me",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
module->numa_distance);
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
@ -30,8 +30,8 @@ static int map_compare_modules(const void *aa, const void *bb)
opal_btl_usnic_module_t *a = *((opal_btl_usnic_module_t**) aa);
opal_btl_usnic_module_t *b = *((opal_btl_usnic_module_t**) bb);
return strcmp(a->fabric_info->fabric_attr->name,
b->fabric_info->fabric_attr->name);
return strcmp(a->linux_device_name,
b->linux_device_name);
}
/*
@ -74,7 +74,7 @@ static int map_output_modules(FILE *fp)
prefix_len);
fprintf(fp, "device=%s,ip=%s,mss=%" PRIsize_t "\n",
modules[i]->fabric_info->fabric_attr->name,
modules[i]->linux_device_name,
ipv4, modules[i]->fabric_info->ep_attr->max_msg_size);
}
@ -102,8 +102,8 @@ static int map_compare_endpoints(const void *aa, const void *bb)
return -1;
}
return strcmp(a->endpoint_module->fabric_info->fabric_attr->name,
b->endpoint_module->fabric_info->fabric_attr->name);
return strcmp(a->endpoint_module->linux_device_name,
b->endpoint_module->linux_device_name);
}
/*
@ -148,7 +148,7 @@ static int map_output_endpoints(FILE *fp, opal_btl_usnic_proc_t *proc)
eps[i]->endpoint_remote_modex.netmask);
fprintf(fp, "device=%s@peer_ip=%s",
eps[i]->endpoint_module->fabric_info->fabric_attr->name,
eps[i]->endpoint_module->linux_device_name,
ipv4);
++num_output;
}

Просмотреть файл

@ -102,7 +102,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module,
if (opal_proc == my_proc) {
opal_output_verbose(75, USNIC_OUT,
"btl:usnic:add_procs:%s: not connecting to self",
module->fabric_info->fabric_attr->name);
module->linux_device_name);
continue;
}
@ -110,7 +110,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module,
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
opal_output_verbose(75, USNIC_OUT,
"btl:usnic:add_procs:%s: not connecting to %s on same server",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
usnic_compat_proc_name_print(&opal_proc->proc_name));
continue;
}
@ -126,7 +126,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module,
skip it */
opal_output_verbose(75, USNIC_OUT,
"btl:usnic:add_procs:%s: peer %s on %s does not have usnic modex info; skipping",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
usnic_compat_proc_name_print(&opal_proc->proc_name),
opal_get_proc_hostname(opal_proc));
continue;
@ -142,7 +142,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module,
if (OPAL_SUCCESS != rc) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:add_procs:%s: unable to create endpoint to peer %s on %s",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
usnic_compat_proc_name_print(&opal_proc->proc_name),
opal_get_proc_hostname(opal_proc));
OBJ_RELEASE(usnic_proc);
@ -161,7 +161,7 @@ static int add_procs_block_create_endpoints(opal_btl_usnic_module_t *module,
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:add_procs:%s: new usnic peer endpoint: %s, proirity port %d, data port %d",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
str,
modex->ports[USNIC_PRIORITY_CHANNEL],
modex->ports[USNIC_DATA_CHANNEL]);
@ -197,14 +197,14 @@ static void add_procs_warn_unreachable(opal_btl_usnic_module_t *module,
opal_output_verbose(15, USNIC_OUT,
"btl:usnic: %s (which is %s) couldn't reach peer %s",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
module->if_ipv4_addr_str,
remote);
opal_show_help("help-mpi-btl-usnic.txt", "unreachable peer IP",
true,
opal_process_info.nodename,
module->if_ipv4_addr_str,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
remote);
}
@ -303,7 +303,7 @@ add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module,
"libfabric API failed",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"async insertion result", __FILE__, __LINE__,
err_entry.err,
"Failed to insert address to AV");
@ -327,7 +327,7 @@ add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_eq_readerr()", __FILE__, __LINE__,
ret,
"Returned != sizeof(err_entry)");
@ -348,7 +348,7 @@ add_procs_block_reap_fi_av_inserts(opal_btl_usnic_module_t *module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_eq_sread()", __FILE__, __LINE__,
ret,
"Returned != (sizeof(entry) or -FI_EAVAIL)");
@ -904,6 +904,8 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl)
fi_close(&module->domain->fid);
fi_close(&module->fabric->fid);
free(module->linux_device_name);
return OPAL_SUCCESS;
}
@ -1423,7 +1425,7 @@ static void module_async_event_callback(int fd, short flags, void *arg)
opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_eq_read()", __FILE__, __LINE__,
ret,
"Failed to get domain event");
@ -1442,7 +1444,7 @@ static void module_async_event_callback(int fd, short flags, void *arg)
ignore it. */
opal_output_verbose(10, USNIC_OUT,
"btl:usnic: got LINK_UP on %s",
module->fabric_info->fabric_attr->name);
module->linux_device_name);
break;
case 1: // USD_EVENT_LINK_DOWN:
@ -1461,7 +1463,7 @@ static void module_async_event_callback(int fd, short flags, void *arg)
opal_show_help("help-mpi-btl-usnic.txt", "async event",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
str, entry.data);
fatal = true;
}
@ -1492,7 +1494,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_dupinfo() failed", __FILE__, __LINE__,
-1, "Unknown");
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1510,14 +1512,14 @@ static int create_ep(opal_btl_usnic_module_t* module,
opal_process_info.my_local_rank);
}
rc = fi_getinfo(FI_VERSION(1, 1), NULL, 0, 0, hint, &channel->info);
rc = fi_getinfo(module->libfabric_api, NULL, 0, 0, hint, &channel->info);
fi_freeinfo(hint);
if (0 != rc) {
opal_show_help("help-mpi-btl-usnic.txt",
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_getinfo() failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1553,7 +1555,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_endpoint() failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1566,7 +1568,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_ep_bind() SCQ to EP failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1577,7 +1579,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_ep_bind() RCQ to EP failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1588,7 +1590,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_ep_bind() AV to EP failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1601,7 +1603,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_enable() failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1623,7 +1625,7 @@ static int create_ep(opal_btl_usnic_module_t* module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_getname() failed", __FILE__, __LINE__,
rc, fi_strerror(-rc));
return OPAL_ERR_OUT_OF_RESOURCE;
@ -1714,7 +1716,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"failed to create CQ", __FILE__, __LINE__);
goto error;
}
@ -1770,7 +1772,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"Failed to get receive buffer from freelist",
__FILE__, __LINE__);
goto error;
@ -1786,7 +1788,7 @@ static int init_one_channel(opal_btl_usnic_module_t *module,
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"Failed to post receive buffer",
__FILE__, __LINE__);
goto error;
@ -1853,7 +1855,7 @@ static void init_local_modex_part1(opal_btl_usnic_module_t *module)
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: %s IP charactertics: %s, %u Mbps",
module->fabric_info->fabric_attr->name,
module->linux_device_name,
module->if_ipv4_addr_str,
modex->link_speed_mbps);
}
@ -2074,7 +2076,7 @@ static int init_mpool(opal_btl_usnic_module_t *module)
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"create rcache", __FILE__, __LINE__);
return OPAL_ERROR;
}
@ -2082,7 +2084,7 @@ static int init_mpool(opal_btl_usnic_module_t *module)
mca_mpool_base_module_lookup (mca_btl_usnic_component.usnic_mpool_hints);
#else
asprintf(&mpool_resources.pool_name, "%s",
module->fabric_info->fabric_attr->name);
module->linux_device_name);
module->super.btl_mpool =
mca_mpool_base_module_create(mca_btl_usnic_component.usnic_mpool_name,
&module->super, &mpool_resources);
@ -2092,7 +2094,7 @@ static int init_mpool(opal_btl_usnic_module_t *module)
"internal error during init",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"create mpool", __FILE__, __LINE__);
return OPAL_ERROR;
}
@ -2205,7 +2207,7 @@ static void init_async_event(opal_btl_usnic_module_t *module)
"libfabric API failed",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_control(eq, FI_GETWAIT)", __FILE__, __LINE__,
ret,
fi_strerror(-ret));

Просмотреть файл

@ -103,8 +103,10 @@ typedef struct opal_btl_usnic_module_t {
/* Cache for use during component_init to associate a module with
the libfabric device that it came from. */
uint32_t libfabric_api;
struct fid_fabric *fabric;
struct fid_domain *domain;
char *linux_device_name;
struct fi_info *fabric_info;
struct fi_usnic_ops_fabric *usnic_fabric_ops;
struct fi_usnic_ops_av *usnic_av_ops;

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
@ -643,7 +643,7 @@ static int match_modex(opal_btl_usnic_module_t *module,
opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
module->fabric_info->ep_attr->max_msg_size,
(NULL == proc->proc_opal->proc_hostname) ?
"unknown" : proc->proc_opal->proc_hostname,
@ -700,7 +700,7 @@ static int start_av_insert(opal_btl_usnic_module_t *module,
opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
"fi_av_insert()", __FILE__, __LINE__,
ret,
"Failed to initiate AV insert");

Просмотреть файл

@ -216,7 +216,7 @@ opal_btl_usnic_endpoint_send_segment(
"CHUNK" : "FRAG",
sseg->ss_base.us_btl_header->pkt_seq,
sseg->ss_base.us_btl_header->sender,
endpoint->endpoint_module->fabric_info->fabric_attr->name,
endpoint->endpoint_module->linux_device_name,
local_ip,
module->local_modex.ports[sseg->ss_channel],
(void*)sseg,

Просмотреть файл

@ -86,7 +86,7 @@ void opal_btl_usnic_print_stats(
prefix,
opal_proc_local_get()->proc_name.vpid,
module->fabric_info->fabric_attr->name,
module->linux_device_name,
module->stats.num_total_sends,
module->mod_channels[USNIC_PRIORITY_CHANNEL].num_channel_sends,
@ -394,7 +394,7 @@ static void setup_mpit_pvars_enum(void)
devices[i].value = i;
rc = asprintf(&str, "%s,%hhu.%hhu.%hhu.%hhu/%" PRIu32,
m->fabric_info->fabric_attr->name,
m->linux_device_name,
c[0], c[1], c[2], c[3],
usnic_netmask_to_cidrlen(sin->sin_addr.s_addr));
assert(rc > 0);