1
1

btl/openib: fix coverity issues

CID 1269933 Uninitialized scalar variable (UNINIT)

This CID isn't really an error but it is best for both valgrind and
coverity cleanness to not write uninitialized data. Added an
initializer for async_command in btl_openib_component_close.

CID 1269930 Uninitialized scalar variable (UNINIT)

Same as above. Best not to write uninitialized data. Added an
initializer for async_command.

CID 1269701 Logically dead code (DEADCODE)

Coverity is correct. The smallest_pp_qp will always be 0. Changed the
initial value so that the smallest_pp_qp is set as intended. If no
per-per queue pair exists then use the last shared queue pair. This
queue pair should have the smallest message size. This will reduce
buffer waste.

CID 1269713 Logically dead code (DEADCODE)

False positive but easy to silence. The two check are meaningless if
HAVE_XRC is 0 so protect them with #if HAVE_XRC.

CID 1269726 Division or modulo by zero (DIVIDE_BY_ZERO)

Indeed an issue. If we get an invalid value for rd_win then this will
cause a divide-by-zero exception. Added a check to ensure rd_win is >
0. Also updated the help message to reflect this requirement.

CID 1269672 Ignoring number of bytes read (CHECKED_RETURN)

This error was somewhat intentional. Linux parameter files are
probably not empty but it is safer to check the return code of read to
make sure we got something. If 0 bytes are read this code could SEGV
whe running strtoull.

CID 1269836 Unintentional integer overflow (OVERFLOW_BEFORE_WIDEN)

Add a range check to read_module_param to ensure we do not
overflow. In the future it might be worthwhile to report an error
because these parameters should never cause overflow in this
calculation.

CID 1269692 Calling risky function (DC.WEAK_CRYPTO)

??? This call was added in 2006 but I see no calls to the rest of the
rand48 family of functions. Anyway, we SHOULD NEVER be calling seed48,
srand, etc because it messes with user code. Removed the call to
seed48.

CID 1269823 Dereference null return value (NULL_RETURNS)

This is likely a false positive. The endpoint lock is being held so no
other thread should be able to remove fragments from the list. Also,
mca_btl_openib_endpoint_post_send should not be removing items from
the list. If a NULL fragment is ever returned it will likely be a
coding error on the part of an Open MPI developer. Added an assert()
to catch this and quiet the coverity error.

CID 1269671 Unchecked return value (CHECKED_RETURN)

Added a check for the return code of mca_btl_openib_endpoint_post_send
to quiet the coverity error. It is unlikely this error path will be
traversed.

CID 1270229 Missing break in switch (MISSING_BREAK)

Add a comment to indicate that the fall-through is intentional.

CID 1269735 Dereference after null check (FORWARD_NULL)

There should always be an endpoint when handling a work
completion. The endpoint is either stored on the fragment or can be
looked up using the immediate data. Move the immediate data code up
and add an assert for a NULL endpoint.

CID 1269740 Dereference after null check (FORWARD_NULL)
CID 1269741 Explicit null dereferenced (FORWARD_NULL)

Similar to CID 1269735 fix.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-05-27 09:43:42 -06:00
родитель 13e0a9da3a
Коммит 6b86e74218
2 изменённых файлов: 66 добавлений и 36 удалений

Просмотреть файл

@ -240,8 +240,8 @@ static int btl_openib_component_close(void)
/* Tell the async thread to shutdown */
if (mca_btl_openib_component.use_async_event_thread &&
0 != mca_btl_openib_component.async_thread) {
mca_btl_openib_async_cmd_t async_command;
async_command.a_cmd = OPENIB_ASYNC_THREAD_EXIT;
mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_THREAD_EXIT,
.fd = -1, .qp = NULL};
if (write(mca_btl_openib_component.async_pipe[1], &async_command,
sizeof(mca_btl_openib_async_cmd_t)) < 0) {
BTL_ERROR(("Failed to communicate with async event thread"));
@ -948,9 +948,9 @@ static void device_destruct(mca_btl_openib_device_t *device)
/* signaling to async_tread to stop poll for this device */
if (mca_btl_openib_component.use_async_event_thread &&
-1 != mca_btl_openib_component.async_pipe[1]) {
mca_btl_openib_async_cmd_t async_command;
async_command.a_cmd = OPENIB_ASYNC_CMD_FD_REMOVE;
async_command.fd = device->ib_dev_context->async_fd;
mca_btl_openib_async_cmd_t async_command = {.a_cmd = OPENIB_ASYNC_CMD_FD_REMOVE,
.fd = device->ib_dev_context->async_fd,
.qp = NULL};
if (write(mca_btl_openib_component.async_pipe[1], &async_command,
sizeof(mca_btl_openib_async_cmd_t)) < 0){
BTL_ERROR(("Failed to write to pipe"));
@ -1223,7 +1223,7 @@ static int setup_qps(void)
int num_xrc_qps = 0, num_pp_qps = 0, num_srq_qps = 0, qp = 0;
uint32_t max_qp_size, max_size_needed;
int32_t min_freelist_size = 0;
int smallest_pp_qp = 0, ret = OPAL_ERROR;
int smallest_pp_qp = INT_MAX, ret = OPAL_ERROR;
queues = opal_argv_split(mca_btl_openib_component.receive_queues, ':');
if (0 == opal_argv_count(queues)) {
@ -1264,6 +1264,8 @@ static int setup_qps(void)
}
qp++;
}
#if HAVE_XRC
/* Current XRC implementation can't used with other QP types - PP
and SRQ */
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
@ -1282,6 +1284,8 @@ static int setup_qps(void)
ret = OPAL_ERR_BAD_PARAM;
goto error;
}
#endif
mca_btl_openib_component.num_pp_qps = num_pp_qps;
mca_btl_openib_component.num_srq_qps = num_srq_qps;
mca_btl_openib_component.num_xrc_qps = num_xrc_qps;
@ -1318,6 +1322,15 @@ static int setup_qps(void)
/* by default set rd_low to be 3/4 of rd_num */
rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
rd_win = atoi_param(P(4), (rd_num - rd_low) * 2);
if (0 >= rd_win) {
opal_show_help("help-mpi-btl-openib.txt",
"invalid pp qp specification", true,
opal_process_info.nodename, queues[qp]);
ret = OPAL_ERR_BAD_PARAM;
goto error;
}
rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win);
BTL_VERBOSE(("pp: rd_num is %d rd_low is %d rd_win %d rd_rsv %d",
@ -1437,7 +1450,11 @@ static int setup_qps(void)
}
mca_btl_openib_component.rdma_qp = mca_btl_openib_component.num_qps - 1;
mca_btl_openib_component.credits_qp = smallest_pp_qp;
if (mca_btl_openib_component.num_qps > smallest_pp_qp) {
mca_btl_openib_component.credits_qp = smallest_pp_qp;
} else {
mca_btl_openib_component.credits_qp = mca_btl_openib_component.num_qps - 1;
}
ret = OPAL_SUCCESS;
error:
@ -1453,23 +1470,33 @@ error:
}
/* read a single integer from a linux module parameters file */
static uint64_t read_module_param(char *file, uint64_t value)
static uint64_t read_module_param(char *file, uint64_t value, uint64_t max)
{
int fd = open(file, O_RDONLY);
char buffer[64];
uint64_t ret;
int rc;
if (0 > fd) {
return value;
}
read (fd, buffer, 64);
rc = read (fd, buffer, 64);
close (fd);
if (0 == rc) {
return value;
}
errno = 0;
ret = strtoull(buffer, NULL, 10);
if (ret > max) {
/* NTH: probably should report a bogus value */
ret = max;
}
return (0 == errno) ? ret : value;
}
@ -1517,8 +1544,8 @@ static uint64_t calculate_max_reg (const char *device_name)
} else if (!strncmp(device_name, "mlx4", 4)) {
if (0 == stat("/sys/module/mlx4_core/parameters/log_num_mtt", &statinfo)) {
mtts_per_seg = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1);
num_mtt = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_num_mtt", 20);
mtts_per_seg = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1, 63);
num_mtt = 1 << read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1, 63);
if (1 == num_mtt) {
/* NTH: is 19 a minimum? when log_num_mtt is set to 0 use 19 */
num_mtt = 1 << 19;
@ -1530,9 +1557,9 @@ static uint64_t calculate_max_reg (const char *device_name)
} else if (!strncmp(device_name, "mthca", 5)) {
if (0 == stat("/sys/module/ib_mthca/parameters/num_mtt", &statinfo)) {
mtts_per_seg = 1 << read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1);
num_mtt = read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20);
reserved_mtt = read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0);
mtts_per_seg = 1 << read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1, 63);
num_mtt = read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20, (uint64_t) -1);
reserved_mtt = read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0, (uint64_t) -1);
max_reg = (num_mtt - reserved_mtt) * opal_getpagesize () * mtts_per_seg;
} else {
@ -2477,7 +2504,6 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_module_t * openib_btl;
mca_btl_base_selected_module_t* ib_selected;
opal_list_item_t* item;
unsigned short seedv[3];
mca_btl_openib_frag_init_data_t *init_data;
struct dev_distance *dev_sorted;
float distance;
@ -2517,11 +2543,6 @@ btl_openib_component_init(int *num_btl_modules,
goto no_btls;
}
seedv[0] = OPAL_PROC_MY_NAME.vpid;
seedv[1] = opal_timer_base_get_cycles();
seedv[2] = opal_timer_base_get_cycles();
seed48(seedv);
/* Read in INI files with device-specific parameters */
if (OPAL_SUCCESS != (ret = opal_btl_openib_ini_init())) {
goto no_btls;
@ -2970,6 +2991,7 @@ static int progress_no_credits_pending_frags(mca_btl_base_endpoint_t *ep)
ep->qps[qp].u.pp_qp.sd_credits > 0 ||
!BTL_OPENIB_QP_TYPE_PP(qp)); --len) {
frag = opal_list_remove_first(&ep->qps[qp].no_credits_pending_frags[pri]);
assert (NULL != frag);
/* If _endpoint_post_send() fails because of
RESOURCE_BUSY, then the frag was re-added to the
@ -3326,19 +3348,25 @@ static char* btl_openib_component_status_to_string(enum ibv_wc_status status)
static void
progress_pending_frags_wqe(mca_btl_base_endpoint_t *ep, const int qpn)
{
int i;
int ret;
opal_list_item_t *frag;
mca_btl_openib_qp_t *qp = ep->qps[qpn].qp;
OPAL_THREAD_LOCK(&ep->endpoint_lock);
for(i = 0; i < 2; i++) {
for(int i = 0; i < 2; i++) {
while(qp->sd_wqe > 0) {
mca_btl_base_endpoint_t *tmp_ep;
frag = opal_list_remove_first(&ep->qps[qpn].no_wqe_pending_frags[i]);
if(NULL == frag)
break;
tmp_ep = to_com_frag(frag)->endpoint;
mca_btl_openib_endpoint_post_send(tmp_ep, to_send_frag(frag));
ret = mca_btl_openib_endpoint_post_send(tmp_ep, to_send_frag(frag));
if (OPAL_SUCCESS != ret) {
/* NTH: this handles retrying if we are out of credits but other errors are not
* handled (maybe abort?). */
opal_list_prepend (&ep->qps[qpn].no_wqe_pending_frags[i], (opal_list_item_t *) frag);
break;
}
}
}
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
@ -3388,10 +3416,20 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
* to. For send fragments "order" contains QP idx the fragment was send
* through */
qp = des->order;
if (IBV_WC_RECV == wc->opcode && (wc->wc_flags & IBV_WC_WITH_IMM)) {
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
wc->imm_data = ntohl(wc->imm_data);
#endif
frag->endpoint = (mca_btl_openib_endpoint_t*)
opal_pointer_array_get_item(device->endpoints, wc->imm_data);
}
endpoint = frag->endpoint;
if(endpoint)
openib_btl = endpoint->endpoint_btl;
assert (NULL != endpoint);
openib_btl = endpoint->endpoint_btl;
if(wc->status != IBV_WC_SUCCESS) {
OPAL_OUTPUT((-1, "Got WC: ERROR"));
@ -3412,6 +3450,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
OPAL_SUCCESS);
/* fall through */
case IBV_WC_RDMA_WRITE:
if (MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) {
mca_btl_openib_put_frag_t *put_frag = to_put_frag(des);
@ -3476,16 +3515,6 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
OPAL_OUTPUT((-1, "Got WC: RDMA_RECV, qp %d, src qp %d, WR ID %" PRIx64,
wc->qp_num, wc->src_qp, wc->wr_id));
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
wc->imm_data = ntohl(wc->imm_data);
#endif
if(wc->wc_flags & IBV_WC_WITH_IMM) {
endpoint = (mca_btl_openib_endpoint_t*)
opal_pointer_array_get_item(device->endpoints, wc->imm_data);
frag->endpoint = endpoint;
openib_btl = endpoint->endpoint_btl;
}
/* Process a RECV */
if(btl_openib_handle_incoming(openib_btl, endpoint, to_recv_frag(frag),
wc->byte_len) != OPAL_SUCCESS) {

Просмотреть файл

@ -365,7 +365,8 @@ Per-peer receive queues require between 2 and 5 parameters:
1. Buffer size in bytes (mandatory)
2. Number of buffers (mandatory)
3. Low buffer count watermark (optional; defaults to (num_buffers / 2))
4. Credit window size (optional; defaults to (low_watermark / 2))
4. Credit window size (optional; defaults to (low_watermark / 2),
must be > 0)
5. Number of buffers reserved for credit messages (optional;
defaults to (num_buffers*2-1)/credit_window)