1
1
openmpi/ompi/mca/btl/usnic/btl_usnic_util.c
Dave Goodell 73a943492c usnic: pack via convertor on the fly
If we need to use a convertor, go back to stashing that convertor in the
frag and populating segments "on the fly" (in
ompi_btl_usnic_module_progress_sends).  Previously we would pack into a
chain of chunk segments at prepare_src time, unnecessarily consuming
additional memory.

Reviewed-by: Jeff Squyres <jsquyres@cisco.com>
Reviewed-by: Reese Faucette <rfaucett@cisco.com>

This commit was SVN r29592.
2013-11-04 22:52:03 +00:00

254 строки
7.1 KiB
C

/*
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#include <unistd.h>
#include <infiniband/verbs.h>
#include "opal/util/show_help.h"
#include "ompi/mca/rte/rte.h"
#include "ompi/constants.h"
#include "btl_usnic_util.h"
#include "opal/util/if.h"
void ompi_btl_usnic_exit(void)
{
ompi_rte_abort(1, NULL);
/* If the error manager returns, wait to be killed */
while (1) {
sleep(99999);
}
}
void
ompi_btl_usnic_dump_hex(uint8_t *addr, int len)
{
char buf[128];
size_t bufspace;
int i, ret;
char *p;
uint32_t sum=0;
p = buf;
memset(buf, 0, sizeof(buf));
bufspace = sizeof(buf) - 1;
for (i=0; i<len; ++i) {
ret = snprintf(p, bufspace, "%02x ", addr[i]);
p += ret;
bufspace -= ret;
sum += addr[i];
if ((i&15) == 15) {
opal_output(0, "%4x: %s\n", i&~15, buf);
p = buf;
memset(buf, 0, sizeof(buf));
bufspace = sizeof(buf) - 1;
}
}
if ((i&15) != 0) {
opal_output(0, "%4x: %s\n", i&~15, buf);
}
/*opal_output(0, "buffer sum = %x\n", sum); */
}
void ompi_btl_usnic_sprintf_mac(char *out, const uint8_t mac[6])
{
snprintf(out, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
mac[0],
mac[1],
mac[2],
mac[3],
mac[4],
mac[5]);
}
void ompi_btl_usnic_sprintf_gid_mac(char *out, union ibv_gid *gid)
{
uint8_t mac[6];
ompi_btl_usnic_gid_to_mac(gid, mac);
ompi_btl_usnic_sprintf_mac(out, mac);
}
/* Pretty-print the given boolean array as a hexadecimal string. slen should
* include space for any null terminator. */
void ompi_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t alen)
{
size_t i = 0;
size_t j = 0;
/* could accommodate other cases, but not needed right now */
assert(slen % 4 == 0);
/* compute one nybble at a time */
while (i < alen && (j < slen - 1)) {
unsigned char tmp = 0;
/* first bool is the leftmost (most significant) bit of the nybble */
tmp |= !!a[i+0] << 3;
tmp |= !!a[i+1] << 2;
tmp |= !!a[i+2] << 1;
tmp |= !!a[i+3] << 0;
tmp += '0';
s[j] = tmp;
++j;
i += 4;
}
s[j++] = '\0';
assert(i <= alen);
assert(j <= slen);
}
int ompi_btl_usnic_find_ip(ompi_btl_usnic_module_t *module, uint8_t mac[6])
{
int i;
uint8_t localmac[6];
char addr_string[32], mac_string[32];
struct sockaddr sa;
struct sockaddr_in *sai;
/* Loop through all IP interfaces looking for the one with the
right MAC */
for (i = opal_ifbegin(); i != -1; i = opal_ifnext(i)) {
if (OPAL_SUCCESS == opal_ifindextomac(i, localmac)) {
/* Is this the MAC I'm looking for? */
if (0 != memcmp(mac, localmac, 6)) {
continue;
}
/* Yes, it is! */
if (OPAL_SUCCESS != opal_ifindextoname(i, module->if_name,
sizeof(module->if_name)) ||
OPAL_SUCCESS != opal_ifindextoaddr(i, &sa, sizeof(sa)) ||
OPAL_SUCCESS != opal_ifindextomask(i, &module->if_cidrmask,
sizeof(module->if_cidrmask)) ||
OPAL_SUCCESS != opal_ifindextomac(i, module->if_mac) ||
OPAL_SUCCESS != opal_ifindextomtu(i, &module->if_mtu)) {
continue;
}
sai = (struct sockaddr_in *) &sa;
memcpy(&module->if_ipv4_addr, &sai->sin_addr, 4);
/* Save this information to my local address field on the
module so that it gets sent in the modex */
module->local_addr.ipv4_addr = module->if_ipv4_addr;
module->local_addr.cidrmask = module->if_cidrmask;
/* Since verbs doesn't offer a way to get standard
Ethernet MTUs (as of libibverbs 1.1.5, the MTUs are
enums, and don't inlcude values for 1500 or 9000), look
up the MTU in the corresponding enic interface. */
module->local_addr.mtu = module->if_mtu;
inet_ntop(AF_INET, &(module->if_ipv4_addr),
addr_string, sizeof(addr_string));
ompi_btl_usnic_sprintf_mac(mac_string, mac);
opal_output_verbose(5, mca_btl_base_verbose,
"btl:usnic: found usNIC device corresponds to IP device %s, %s/%d, MAC %s",
module->if_name, addr_string, module->if_cidrmask,
mac_string);
return OMPI_SUCCESS;
}
}
return OMPI_ERR_NOT_FOUND;
}
/*
* Reverses the encoding done in usnic_main.c:usnic_mac_to_gid() in
* the usnic.ko kernel code.
*
* Got this scheme from Mellanox RoCE; Emulex did the same thing. So
* we followed convention.
* http://www.mellanox.com/related-docs/prod_software/RoCE_with_Priority_Flow_Control_Application_Guide.pdf
*/
void ompi_btl_usnic_gid_to_mac(union ibv_gid *gid, uint8_t mac[6])
{
mac[0] = gid->raw[8] ^ 2;
mac[1] = gid->raw[9];
mac[2] = gid->raw[10];
mac[3] = gid->raw[13];
mac[4] = gid->raw[14];
mac[5] = gid->raw[15];
}
/* takes an IPv4 address in network byte order and a CIDR prefix length (the
* "X" in "a.b.c.d/X") and returns the subnet in network byte order. */
uint32_t ompi_btl_usnic_get_ipv4_subnet(uint32_t addrn, uint32_t cidr_len)
{
uint32_t mask;
assert(cidr_len <= 32);
/* perform arithmetic in host byte order for shift correctness */
mask = (~0) << (32 - cidr_len);
return htonl(ntohl(addrn) & mask);
}
/*
* Simple utility in a .c file, mainly so that inline functions in .h
* files don't need to include RTE header files.
*/
void ompi_btl_usnic_util_abort(const char *msg, const char *file, int line,
int ret)
{
opal_show_help("help-mpi-btl-usnic.txt", "internal error after init",
true,
ompi_process_info.nodename,
msg, file, line, strerror(ret));
ompi_rte_abort(ret, NULL);
/* Never returns */
}
/* Return the largest size data size that can be packed into max_len using the
* given convertor. For example, a 1000 byte max_len buffer may only be able
* to hold 998 bytes if an indivisible convertor element straddles the 1000
* byte boundary.
*
* This routine internally clones the convertor and does not mutate it!
*/
size_t ompi_btl_usnic_convertor_pack_peek(
const opal_convertor_t *conv,
size_t max_len)
{
int rc;
size_t packable_len, position;
opal_convertor_t temp;
OBJ_CONSTRUCT(&temp, opal_convertor_t);
position = conv->bConverted + max_len;
rc = opal_convertor_clone_with_position(conv, &temp, 1, &position);
if (OPAL_UNLIKELY(rc < 0)) {
BTL_ERROR(("unexpected convertor error"));
abort(); /* XXX */
}
assert(position >= conv->bConverted);
packable_len = position - conv->bConverted;
OBJ_DESTRUCT(&temp);
return packable_len;
}