Коммит
9c170a8c00
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -430,6 +431,7 @@ int opal_dss_unpack_float(opal_buffer_t *buffer, void *dest,
|
||||
tmp = strtof(convert, NULL);
|
||||
memcpy(&desttmp[i], &tmp, sizeof(tmp));
|
||||
free(convert);
|
||||
convert = NULL;
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -460,6 +462,7 @@ int opal_dss_unpack_double(opal_buffer_t *buffer, void *dest,
|
||||
tmp = strtod(convert, NULL);
|
||||
memcpy(&desttmp[i], &tmp, sizeof(tmp));
|
||||
free(convert);
|
||||
convert = NULL;
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -641,7 +641,9 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
|
||||
}
|
||||
|
||||
if(mca_btl_openib_component.use_async_event_thread) {
|
||||
mca_btl_openib_async_cmd_t async_command;
|
||||
mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_CMD_FD_ADD,
|
||||
.fd = device->ib_dev_context->async_fd,
|
||||
.qp = NULL};
|
||||
|
||||
/* start the async even thread if it is not already started */
|
||||
if (start_async_event_thread() != OPAL_SUCCESS)
|
||||
@ -649,8 +651,6 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
|
||||
|
||||
device->got_fatal_event = false;
|
||||
device->got_port_event = false;
|
||||
async_command.a_cmd = OPENIB_ASYNC_CMD_FD_ADD;
|
||||
async_command.fd = device->ib_dev_context->async_fd;
|
||||
if (write(mca_btl_openib_component.async_pipe[1],
|
||||
&async_command, sizeof(mca_btl_openib_async_cmd_t))<0){
|
||||
BTL_ERROR(("Failed to write to pipe [%d]",errno));
|
||||
@ -699,7 +699,7 @@ static int prepare_device_for_use (mca_btl_openib_device_t *device)
|
||||
if (mca_btl_openib_component.max_eager_rdma > 0 &&
|
||||
device->use_eager_rdma) {
|
||||
device->eager_rdma_buffers =
|
||||
(mca_btl_base_endpoint_t **) calloc(mca_btl_openib_component.max_eager_rdma * device->btls,
|
||||
(mca_btl_base_endpoint_t **) calloc((size_t) mca_btl_openib_component.max_eager_rdma * device->btls,
|
||||
sizeof(mca_btl_openib_endpoint_t*));
|
||||
if(NULL == device->eager_rdma_buffers) {
|
||||
BTL_ERROR(("Memory allocation fails"));
|
||||
|
@ -4,7 +4,7 @@
|
||||
* Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Bull SAS. All rights reserved.
|
||||
@ -170,9 +170,10 @@ static int btl_openib_async_commandh(struct mca_btl_openib_async_poll *devices_p
|
||||
{
|
||||
struct pollfd *async_pollfd_tmp;
|
||||
mca_btl_openib_async_cmd_t cmd;
|
||||
int fd,flags,j;
|
||||
int fd,flags,j,ret;
|
||||
/* Got command from main thread */
|
||||
if (read(devices_poll->async_pollfd[0].fd, &cmd, sizeof(mca_btl_openib_async_cmd_t)) < 0) {
|
||||
ret = read(devices_poll->async_pollfd[0].fd, &cmd, sizeof(mca_btl_openib_async_cmd_t));
|
||||
if (sizeof(mca_btl_openib_async_cmd_t) != ret) {
|
||||
BTL_ERROR(("Read failed [%d]",errno));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
@ -328,7 +329,6 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
int j;
|
||||
mca_btl_openib_device_t *device = NULL;
|
||||
struct ibv_async_event event;
|
||||
bool xrc_event = false;
|
||||
int event_type;
|
||||
|
||||
/* We need to find correct device and process this event */
|
||||
@ -356,6 +356,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
/* is it XRC event ?*/
|
||||
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
|
||||
#else
|
||||
bool xrc_event = false;
|
||||
if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
|
||||
xrc_event = true;
|
||||
/* Clean the bitnd handel as usual */
|
||||
@ -368,14 +369,14 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
BTL_ERROR(("Alternative path migration event reported"));
|
||||
if (APM_ENABLED) {
|
||||
BTL_ERROR(("Trying to find additional path..."));
|
||||
if (!xrc_event)
|
||||
mca_btl_openib_load_apm(event.element.qp,
|
||||
qp2endpoint(event.element.qp, device));
|
||||
#if HAVE_XRC && !OPAL_HAVE_CONNECTX_XRC_DOMAINS
|
||||
else
|
||||
if (xrc_event)
|
||||
mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
|
||||
xrc_qp2endpoint(event.element.xrc_qp_num, device));
|
||||
else
|
||||
#endif
|
||||
mca_btl_openib_load_apm(event.element.qp,
|
||||
qp2endpoint(event.element.qp, device));
|
||||
}
|
||||
break;
|
||||
case IBV_EVENT_DEVICE_FATAL:
|
||||
@ -383,6 +384,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
device->got_fatal_event = true;
|
||||
/* It is not critical to protect the counter */
|
||||
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
|
||||
/* fall through */
|
||||
case IBV_EVENT_CQ_ERR:
|
||||
case IBV_EVENT_QP_FATAL:
|
||||
if (event_type == IBV_EVENT_QP_FATAL) {
|
||||
@ -415,15 +417,13 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
opal_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,opal_process_info.nodename, (int)getpid(),
|
||||
event_type,
|
||||
openib_event_to_str((enum ibv_event_type)event_type),
|
||||
xrc_event ? "true" : "false");
|
||||
openib_event_to_str((enum ibv_event_type)event_type));
|
||||
break;
|
||||
case IBV_EVENT_PORT_ERR:
|
||||
opal_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,opal_process_info.nodename, (int)getpid(),
|
||||
event_type,
|
||||
openib_event_to_str((enum ibv_event_type)event_type),
|
||||
xrc_event ? "true" : "false");
|
||||
openib_event_to_str((enum ibv_event_type)event_type));
|
||||
/* Set the flag to indicate port error */
|
||||
device->got_port_event = true;
|
||||
OPAL_THREAD_ADD32(&mca_btl_openib_component.error_counter, 1);
|
||||
@ -451,7 +451,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
default:
|
||||
opal_show_help("help-mpi-btl-openib.txt", "of unknown event",
|
||||
true,opal_process_info.nodename, (int)getpid(),
|
||||
event_type, xrc_event ? "true" : "false");
|
||||
event_type);
|
||||
}
|
||||
ibv_ack_async_event(&event);
|
||||
} else {
|
||||
@ -545,7 +545,7 @@ int btl_openib_async_command_done(int exp)
|
||||
{
|
||||
int comp;
|
||||
if (read(mca_btl_openib_component.async_comp_pipe[0], &comp,
|
||||
sizeof(int)) < 0){
|
||||
sizeof(int)) < (int) sizeof (int)){
|
||||
BTL_ERROR(("Failed to read from pipe"));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
@ -240,8 +240,8 @@ static int btl_openib_component_close(void)
|
||||
/* Tell the async thread to shutdown */
|
||||
if (mca_btl_openib_component.use_async_event_thread &&
|
||||
0 != mca_btl_openib_component.async_thread) {
|
||||
mca_btl_openib_async_cmd_t async_command;
|
||||
async_command.a_cmd = OPENIB_ASYNC_THREAD_EXIT;
|
||||
mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_THREAD_EXIT,
|
||||
.fd = -1, .qp = NULL};
|
||||
if (write(mca_btl_openib_component.async_pipe[1], &async_command,
|
||||
sizeof(mca_btl_openib_async_cmd_t)) < 0) {
|
||||
BTL_ERROR(("Failed to communicate with async event thread"));
|
||||
@ -948,9 +948,9 @@ static void device_destruct(mca_btl_openib_device_t *device)
|
||||
/* signaling to async_tread to stop poll for this device */
|
||||
if (mca_btl_openib_component.use_async_event_thread &&
|
||||
-1 != mca_btl_openib_component.async_pipe[1]) {
|
||||
mca_btl_openib_async_cmd_t async_command;
|
||||
async_command.a_cmd = OPENIB_ASYNC_CMD_FD_REMOVE;
|
||||
async_command.fd = device->ib_dev_context->async_fd;
|
||||
mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_CMD_FD_REMOVE,
|
||||
.fd = device->ib_dev_context->async_fd,
|
||||
.qp = NULL};
|
||||
if (write(mca_btl_openib_component.async_pipe[1], &async_command,
|
||||
sizeof(mca_btl_openib_async_cmd_t)) < 0){
|
||||
BTL_ERROR(("Failed to write to pipe"));
|
||||
@ -1223,7 +1223,7 @@ static int setup_qps(void)
|
||||
int num_xrc_qps = 0, num_pp_qps = 0, num_srq_qps = 0, qp = 0;
|
||||
uint32_t max_qp_size, max_size_needed;
|
||||
int32_t min_freelist_size = 0;
|
||||
int smallest_pp_qp = 0, ret = OPAL_ERROR;
|
||||
int smallest_pp_qp = INT_MAX, ret = OPAL_ERROR;
|
||||
|
||||
queues = opal_argv_split(mca_btl_openib_component.receive_queues, ':');
|
||||
if (0 == opal_argv_count(queues)) {
|
||||
@ -1264,6 +1264,8 @@ static int setup_qps(void)
|
||||
}
|
||||
qp++;
|
||||
}
|
||||
|
||||
#if HAVE_XRC
|
||||
/* Current XRC implementation can't used with other QP types - PP
|
||||
and SRQ */
|
||||
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
||||
@ -1282,6 +1284,8 @@ static int setup_qps(void)
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
mca_btl_openib_component.num_pp_qps = num_pp_qps;
|
||||
mca_btl_openib_component.num_srq_qps = num_srq_qps;
|
||||
mca_btl_openib_component.num_xrc_qps = num_xrc_qps;
|
||||
@ -1318,6 +1322,15 @@ static int setup_qps(void)
|
||||
/* by default set rd_low to be 3/4 of rd_num */
|
||||
rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
|
||||
rd_win = atoi_param(P(4), (rd_num - rd_low) * 2);
|
||||
|
||||
if (0 >= rd_win) {
|
||||
opal_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid pp qp specification", true,
|
||||
opal_process_info.nodename, queues[qp]);
|
||||
ret = OPAL_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win);
|
||||
|
||||
BTL_VERBOSE(("pp: rd_num is %d rd_low is %d rd_win %d rd_rsv %d",
|
||||
@ -1437,7 +1450,11 @@ static int setup_qps(void)
|
||||
}
|
||||
|
||||
mca_btl_openib_component.rdma_qp = mca_btl_openib_component.num_qps - 1;
|
||||
if (mca_btl_openib_component.num_qps > smallest_pp_qp) {
|
||||
mca_btl_openib_component.credits_qp = smallest_pp_qp;
|
||||
} else {
|
||||
mca_btl_openib_component.credits_qp = mca_btl_openib_component.num_qps - 1;
|
||||
}
|
||||
|
||||
ret = OPAL_SUCCESS;
|
||||
error:
|
||||
@ -1453,23 +1470,33 @@ error:
|
||||
}
|
||||
|
||||
/* read a single integer from a linux module parameters file */
|
||||
static uint64_t read_module_param(char *file, uint64_t value)
|
||||
static uint64_t read_module_param(char *file, uint64_t value, uint64_t max)
|
||||
{
|
||||
int fd = open(file, O_RDONLY);
|
||||
char buffer[64];
|
||||
uint64_t ret;
|
||||
int rc;
|
||||
|
||||
if (0 > fd) {
|
||||
return value;
|
||||
}
|
||||
|
||||
read (fd, buffer, 64);
|
||||
rc = read (fd, buffer, 64);
|
||||
|
||||
close (fd);
|
||||
|
||||
if (0 == rc) {
|
||||
return value;
|
||||
}
|
||||
|
||||
errno = 0;
|
||||
ret = strtoull(buffer, NULL, 10);
|
||||
|
||||
if (ret > max) {
|
||||
/* NTH: probably should report a bogus value */
|
||||
ret = max;
|
||||
}
|
||||
|
||||
return (0 == errno) ? ret : value;
|
||||
}
|
||||
|
||||
@ -1517,8 +1544,8 @@ static uint64_t calculate_max_reg (const char *device_name)
|
||||
|
||||
} else if (!strncmp(device_name, "mlx4", 4)) {
|
||||
if (0 == stat("/sys/module/mlx4_core/parameters/log_num_mtt", &statinfo)) {
|
||||
mtts_per_seg = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1);
|
||||
num_mtt = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_num_mtt", 20);
|
||||
mtts_per_seg = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1, 63);
|
||||
num_mtt = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1, 63);
|
||||
if (1 == num_mtt) {
|
||||
/* NTH: is 19 a minimum? when log_num_mtt is set to 0 use 19 */
|
||||
num_mtt = 1 << 19;
|
||||
@ -1530,9 +1557,9 @@ static uint64_t calculate_max_reg (const char *device_name)
|
||||
|
||||
} else if (!strncmp(device_name, "mthca", 5)) {
|
||||
if (0 == stat("/sys/module/ib_mthca/parameters/num_mtt", &statinfo)) {
|
||||
mtts_per_seg = 1 << read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1);
|
||||
num_mtt = read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20);
|
||||
reserved_mtt = read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0);
|
||||
mtts_per_seg = 1 << read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1, 63);
|
||||
num_mtt = read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20, (uint64_t) -1);
|
||||
reserved_mtt = read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0, (uint64_t) -1);
|
||||
|
||||
max_reg = (num_mtt - reserved_mtt) * opal_getpagesize () * mtts_per_seg;
|
||||
} else {
|
||||
@ -2477,7 +2504,6 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
mca_btl_openib_module_t * openib_btl;
|
||||
mca_btl_base_selected_module_t* ib_selected;
|
||||
opal_list_item_t* item;
|
||||
unsigned short seedv[3];
|
||||
mca_btl_openib_frag_init_data_t *init_data;
|
||||
struct dev_distance *dev_sorted;
|
||||
float distance;
|
||||
@ -2517,11 +2543,6 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
seedv[0] = OPAL_PROC_MY_NAME.vpid;
|
||||
seedv[1] = opal_timer_base_get_cycles();
|
||||
seedv[2] = opal_timer_base_get_cycles();
|
||||
seed48(seedv);
|
||||
|
||||
/* Read in INI files with device-specific parameters */
|
||||
if (OPAL_SUCCESS != (ret = opal_btl_openib_ini_init())) {
|
||||
goto no_btls;
|
||||
@ -2970,6 +2991,7 @@ static int progress_no_credits_pending_frags(mca_btl_base_endpoint_t *ep)
|
||||
ep->qps[qp].u.pp_qp.sd_credits > 0 ||
|
||||
!BTL_OPENIB_QP_TYPE_PP(qp)); --len) {
|
||||
frag = opal_list_remove_first(&ep->qps[qp].no_credits_pending_frags[pri]);
|
||||
assert (NULL != frag);
|
||||
|
||||
/* If _endpoint_post_send() fails because of
|
||||
RESOURCE_BUSY, then the frag was re-added to the
|
||||
@ -3326,19 +3348,25 @@ static char* btl_openib_component_status_to_string(enum ibv_wc_status status)
|
||||
static void
|
||||
progress_pending_frags_wqe(mca_btl_base_endpoint_t *ep, const int qpn)
|
||||
{
|
||||
int i;
|
||||
int ret;
|
||||
opal_list_item_t *frag;
|
||||
mca_btl_openib_qp_t *qp = ep->qps[qpn].qp;
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->endpoint_lock);
|
||||
for(i = 0; i < 2; i++) {
|
||||
for(int i = 0; i < 2; i++) {
|
||||
while(qp->sd_wqe > 0) {
|
||||
mca_btl_base_endpoint_t *tmp_ep;
|
||||
frag = opal_list_remove_first(&ep->qps[qpn].no_wqe_pending_frags[i]);
|
||||
if(NULL == frag)
|
||||
break;
|
||||
tmp_ep = to_com_frag(frag)->endpoint;
|
||||
mca_btl_openib_endpoint_post_send(tmp_ep, to_send_frag(frag));
|
||||
ret = mca_btl_openib_endpoint_post_send(tmp_ep, to_send_frag(frag));
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
/* NTH: this handles retrying if we are out of credits but other errors are not
|
||||
* handled (maybe abort?). */
|
||||
opal_list_prepend (&ep->qps[qpn].no_wqe_pending_frags[i], (opal_list_item_t *) frag);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
||||
@ -3388,9 +3416,19 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
|
||||
* to. For send fragments "order" contains QP idx the fragment was send
|
||||
* through */
|
||||
qp = des->order;
|
||||
|
||||
if (IBV_WC_RECV == wc->opcode && (wc->wc_flags & IBV_WC_WITH_IMM)) {
|
||||
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
wc->imm_data = ntohl(wc->imm_data);
|
||||
#endif
|
||||
frag->endpoint = (mca_btl_openib_endpoint_t*)
|
||||
opal_pointer_array_get_item(device->endpoints, wc->imm_data);
|
||||
}
|
||||
|
||||
endpoint = frag->endpoint;
|
||||
|
||||
if(endpoint)
|
||||
assert (NULL != endpoint);
|
||||
|
||||
openib_btl = endpoint->endpoint_btl;
|
||||
|
||||
if(wc->status != IBV_WC_SUCCESS) {
|
||||
@ -3412,6 +3450,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
|
||||
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
|
||||
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
|
||||
OPAL_SUCCESS);
|
||||
/* fall through */
|
||||
case IBV_WC_RDMA_WRITE:
|
||||
if (MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) {
|
||||
mca_btl_openib_put_frag_t *put_frag = to_put_frag(des);
|
||||
@ -3476,16 +3515,6 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
|
||||
OPAL_OUTPUT((-1, "Got WC: RDMA_RECV, qp %d, src qp %d, WR ID %" PRIx64,
|
||||
wc->qp_num, wc->src_qp, wc->wr_id));
|
||||
|
||||
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
wc->imm_data = ntohl(wc->imm_data);
|
||||
#endif
|
||||
if(wc->wc_flags & IBV_WC_WITH_IMM) {
|
||||
endpoint = (mca_btl_openib_endpoint_t*)
|
||||
opal_pointer_array_get_item(device->endpoints, wc->imm_data);
|
||||
frag->endpoint = endpoint;
|
||||
openib_btl = endpoint->endpoint_btl;
|
||||
}
|
||||
|
||||
/* Process a RECV */
|
||||
if(btl_openib_handle_incoming(openib_btl, endpoint, to_recv_frag(frag),
|
||||
wc->byte_len) != OPAL_SUCCESS) {
|
||||
|
@ -689,7 +689,7 @@ static void mca_btl_openib_endpoint_credits(
|
||||
|
||||
/* we don't acquire a WQE for credit message - so decrement.
|
||||
* Note: doing it for QP used for credit management */
|
||||
qp_get_wqe(ep, des->order);
|
||||
(void) qp_get_wqe(ep, des->order);
|
||||
|
||||
if(check_send_credits(ep, qp) || check_eager_rdma_credits(ep))
|
||||
mca_btl_openib_endpoint_send_credits(ep, qp);
|
||||
|
@ -427,14 +427,15 @@ static inline mca_btl_openib_coalesced_frag_t *alloc_coalesced_frag(void)
|
||||
do { \
|
||||
opal_free_list_return (to_base_frag(frag)->list, \
|
||||
(opal_free_list_item_t*)(frag)); \
|
||||
} while(0);
|
||||
} while(0)
|
||||
|
||||
#define MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(list) \
|
||||
while(!opal_list_is_empty(list)){ \
|
||||
opal_list_item_t *frag_item; \
|
||||
frag_item = opal_list_remove_first(list); \
|
||||
MCA_BTL_IB_FRAG_RETURN(frag_item); \
|
||||
do { \
|
||||
opal_list_item_t *_frag_item; \
|
||||
while (NULL != (_frag_item = opal_list_remove_first(list))) { \
|
||||
MCA_BTL_IB_FRAG_RETURN(_frag_item); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
struct mca_btl_openib_module_t;
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
@ -105,6 +105,8 @@ static int reg_string(const char* param_name,
|
||||
{
|
||||
int index;
|
||||
|
||||
assert (NULL != storage);
|
||||
|
||||
/* The MCA variable system will not change this pointer */
|
||||
*storage = (char *) default_value;
|
||||
index = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
|
||||
@ -117,7 +119,7 @@ static int reg_string(const char* param_name,
|
||||
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
|
||||
}
|
||||
|
||||
if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == storage || 0 == strlen(*storage))) {
|
||||
if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
|
||||
opal_output(0, "Bad parameter value for parameter \"%s\"",
|
||||
param_name);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
|
@ -365,7 +365,8 @@ Per-peer receive queues require between 2 and 5 parameters:
|
||||
1. Buffer size in bytes (mandatory)
|
||||
2. Number of buffers (mandatory)
|
||||
3. Low buffer count watermark (optional; defaults to (num_buffers / 2))
|
||||
4. Credit window size (optional; defaults to (low_watermark / 2))
|
||||
4. Credit window size (optional; defaults to (low_watermark / 2),
|
||||
must be > 0)
|
||||
5. Number of buffers reserved for credit messages (optional;
|
||||
defaults to (num_buffers*2-1)/credit_window)
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -11,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2012 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
@ -226,21 +227,34 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include,
|
||||
opal_common_verbs_device_item_t *di;
|
||||
opal_common_verbs_port_item_t *pi;
|
||||
int rc;
|
||||
uint32_t i, j;
|
||||
uint32_t j;
|
||||
opal_list_t *port_list = NULL;
|
||||
opal_list_item_t *item;
|
||||
bool want;
|
||||
|
||||
/* Sanity check the include/exclude params */
|
||||
if (NULL != if_include && NULL != if_exclude) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Query all the IBV devices on the machine. Use an ompi
|
||||
compatibility function, because how to get this list changed
|
||||
over the history of the IBV API. */
|
||||
devices = opal_ibv_get_device_list(&num_devs);
|
||||
if (0 == num_devs) {
|
||||
opal_output_verbose(5, stream, "no verbs interfaces found");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
opal_output_verbose(5, stream, "found %d verbs interface%s",
|
||||
num_devs, (num_devs != 1) ? "s" : "");
|
||||
|
||||
/* Allocate a list to fill */
|
||||
port_list = OBJ_NEW(opal_list_t);
|
||||
if (NULL == port_list) {
|
||||
goto err_free_argv;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Sanity check the include/exclude params */
|
||||
if (NULL != if_include && NULL != if_exclude) {
|
||||
return port_list;
|
||||
} else if (NULL != if_include) {
|
||||
if (NULL != if_include) {
|
||||
opal_output_verbose(5, stream, "finding verbs interfaces, including %s",
|
||||
if_include);
|
||||
if_include_list = opal_argv_split(if_include, ',');
|
||||
@ -252,22 +266,10 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include,
|
||||
if_sanity_list = opal_argv_copy(if_exclude_list);
|
||||
}
|
||||
|
||||
/* Query all the IBV devices on the machine. Use an ompi
|
||||
compatibility function, because how to get this list changed
|
||||
over the history of the IBV API. */
|
||||
devices = opal_ibv_get_device_list(&num_devs);
|
||||
if (0 == num_devs) {
|
||||
opal_output_verbose(5, stream, "no verbs interfaces found");
|
||||
goto err_free_argv;
|
||||
} else {
|
||||
opal_output_verbose(5, stream, "found %d verbs interface%s",
|
||||
num_devs, (num_devs != 1) ? "s" : "");
|
||||
}
|
||||
|
||||
/* Now loop through all the devices. Get the attributes for each
|
||||
port on each device to see if they match our selection
|
||||
criteria. */
|
||||
for (i = 0; (int32_t) i < num_devs; ++i) {
|
||||
for (int32_t i = 0; (int32_t) i < num_devs; ++i) {
|
||||
/* See if this device is on the include/exclude sanity check
|
||||
list. If it is, remove it from the sanity check list
|
||||
(i.e., we should end up with an empty list at the end if
|
||||
@ -481,27 +483,23 @@ opal_list_t *opal_common_verbs_find_ports(const char *if_include,
|
||||
opal_argv_free(if_sanity_list);
|
||||
}
|
||||
|
||||
opal_argv_free(if_include_list);
|
||||
opal_argv_free(if_exclude_list);
|
||||
|
||||
/* All done! */
|
||||
opal_ibv_free_device_list(devices);
|
||||
return port_list;
|
||||
|
||||
err_free_port_list:
|
||||
for (item = opal_list_remove_first(port_list);
|
||||
item != NULL;
|
||||
item = opal_list_remove_first(port_list)) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OPAL_LIST_RELEASE(port_list);
|
||||
opal_ibv_free_device_list(devices);
|
||||
|
||||
err_free_argv:
|
||||
if (NULL != if_sanity_list) {
|
||||
opal_argv_free(if_sanity_list);
|
||||
if_sanity_list = NULL;
|
||||
}
|
||||
opal_argv_free(if_include_list);
|
||||
if_include_list = NULL;
|
||||
opal_argv_free(if_exclude_list);
|
||||
if_exclude_list = NULL;
|
||||
|
||||
return port_list;
|
||||
opal_argv_free(if_include_list);
|
||||
opal_argv_free(if_exclude_list);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -115,19 +115,11 @@ OBJ_CLASS_INSTANCE(opal_crs_base_ckpt_options_t,
|
||||
* Utility functions
|
||||
*/
|
||||
int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***value) {
|
||||
int exit_status = OPAL_SUCCESS;
|
||||
char * loc_token = NULL;
|
||||
char * loc_value = NULL;
|
||||
int argc = 0;
|
||||
|
||||
/* Dummy check */
|
||||
if( NULL == token ) {
|
||||
exit_status = OPAL_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
if( NULL == metadata ) {
|
||||
exit_status = OPAL_ERROR;
|
||||
goto cleanup;
|
||||
if (NULL == token || NULL == metadata) {
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -135,6 +127,8 @@ int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***valu
|
||||
*/
|
||||
rewind(metadata);
|
||||
do {
|
||||
char *loc_token = NULL, *loc_value = NULL;
|
||||
|
||||
/* Get next token */
|
||||
if( OPAL_SUCCESS != metadata_extract_next_token(metadata, &loc_token, &loc_value) ) {
|
||||
break;
|
||||
@ -144,13 +138,12 @@ int opal_crs_base_metadata_read_token(FILE *metadata, char * token, char ***valu
|
||||
if(0 == strncmp(token, loc_token, strlen(loc_token)) ) {
|
||||
opal_argv_append(&argc, value, loc_value);
|
||||
}
|
||||
|
||||
free (loc_token);
|
||||
free (loc_value);
|
||||
} while (0 == feof(metadata));
|
||||
|
||||
cleanup:
|
||||
if (NULL != metadata) {
|
||||
rewind(metadata);
|
||||
}
|
||||
return exit_status;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int opal_crs_base_extract_expected_component(FILE *metadata, char ** component_name, int *prev_pid)
|
||||
|
@ -1,5 +1,8 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,9 +29,11 @@ static void cleanup_cred(opal_sec_cred_t *cred)
|
||||
}
|
||||
if (NULL != cred->method) {
|
||||
free(cred->method);
|
||||
cred->method = NULL;
|
||||
}
|
||||
if (NULL != cred->credential) {
|
||||
free(cred->credential);
|
||||
cred->credential = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,7 +104,7 @@ int opal_sec_base_validate(char *payload, size_t size, char **method)
|
||||
opal_sec_handle_t *hdl;
|
||||
opal_buffer_t buf;
|
||||
int cnt, rc;
|
||||
opal_sec_cred_t cred;
|
||||
opal_sec_cred_t cred = {.method = NULL, .credential = NULL};
|
||||
|
||||
opal_output_verbose(5, opal_sec_base_framework.framework_output,
|
||||
"opal_sec: Received credential of size %lu",
|
||||
|
@ -54,7 +54,7 @@ extern int opal_util_keyval_yylineno;
|
||||
#define YY_NO_UNPUT 1
|
||||
#define YY_SKIP_YYWRAP 1
|
||||
|
||||
enum {
|
||||
enum opal_keyval_parse_state_t {
|
||||
OPAL_UTIL_KEYVAL_PARSE_DONE,
|
||||
OPAL_UTIL_KEYVAL_PARSE_ERROR,
|
||||
|
||||
@ -68,5 +68,6 @@ enum {
|
||||
|
||||
OPAL_UTIL_KEYVAL_PARSE_MAX
|
||||
};
|
||||
typedef enum opal_keyval_parse_state_t opal_keyval_parse_state_t;
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,23 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
@ -19,7 +39,7 @@ static size_t key_buffer_len = 0;
|
||||
static opal_mutex_t keyval_mutex;
|
||||
|
||||
static int parse_line(void);
|
||||
static int parse_line_new(int first_val);
|
||||
static int parse_line_new(opal_keyval_parse_state_t first_val);
|
||||
static void parse_error(int num);
|
||||
|
||||
static char *env_str = NULL;
|
||||
@ -249,31 +269,45 @@ static int save_param_name(const char* prefix, const char* suffix)
|
||||
static int add_to_env_str(char *var, char *val)
|
||||
{
|
||||
int sz, varsz, valsz;
|
||||
void *tmp;
|
||||
|
||||
if (NULL == var) {
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
if (NULL != env_str) {
|
||||
varsz = (NULL != var) ? strlen(var) : 0;
|
||||
varsz = strlen(var);
|
||||
valsz = (NULL != val) ? strlen(val) : 0;
|
||||
sz = strlen(env_str)+varsz+valsz+2;
|
||||
if (envsize <= sz) {
|
||||
envsize *=2;
|
||||
env_str = realloc(env_str, envsize);
|
||||
memset(env_str + strlen(env_str), 0, envsize/2);
|
||||
|
||||
tmp = realloc(env_str, envsize);
|
||||
if (NULL == tmp) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
env_str = tmp;
|
||||
}
|
||||
strcat(env_str, ";");
|
||||
} else {
|
||||
env_str = malloc(envsize);
|
||||
memset(env_str, 0, envsize);
|
||||
env_str = calloc(1, envsize);
|
||||
if (NULL == env_str) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
strcat(env_str, var);
|
||||
if (NULL != val) {
|
||||
strcat(env_str, "=");
|
||||
strcat(env_str, val);
|
||||
}
|
||||
return 0;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int parse_line_new(int first_val)
|
||||
static int parse_line_new(opal_keyval_parse_state_t first_val)
|
||||
{
|
||||
int val;
|
||||
opal_keyval_parse_state_t val;
|
||||
char *tmp;
|
||||
|
||||
val = first_val;
|
||||
@ -307,16 +341,14 @@ static int parse_line_new(int first_val)
|
||||
} else if (OPAL_UTIL_KEYVAL_PARSE_ENVVAR == val) {
|
||||
save_param_name("-x", "=");
|
||||
add_to_env_str(key_buffer, NULL);
|
||||
}
|
||||
val = opal_util_keyval_yylex();
|
||||
}
|
||||
|
||||
if (OPAL_UTIL_KEYVAL_PARSE_DONE == val ||
|
||||
OPAL_UTIL_KEYVAL_PARSE_NEWLINE == val) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* Nope -- we got something unexpected. Bonk! */
|
||||
} else {
|
||||
/* we got something unexpected. Bonk! */
|
||||
parse_error(6);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
val = opal_util_keyval_yylex();
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,8 +11,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -119,6 +120,9 @@ int opal_util_init_sys_limits(char **errmsg)
|
||||
|
||||
/* parse the requested limits to set */
|
||||
lims = opal_argv_split(opal_set_max_sys_limits, ',');
|
||||
if (NULL == lims) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* each limit is expressed as a "param:value" pair */
|
||||
for (i=0; NULL != lims[i]; i++) {
|
||||
@ -223,9 +227,7 @@ int opal_util_init_sys_limits(char **errmsg)
|
||||
rc = OPAL_SUCCESS;
|
||||
|
||||
out:
|
||||
if (NULL != lims) {
|
||||
opal_argv_free(lims);
|
||||
}
|
||||
if (NULL != lim) {
|
||||
opal_argv_free(lim);
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user