1
1
openmpi/opal/mca/btl/openib/btl_openib_xrc.h
Nathan Hjelm 56bdcd0888 btl/openib: fix XRC WQE calculation
Before dynamic add_procs support was committed to master we called
add_procs with every proc in the job. The XRC code in the openib btl
was taking advantage of this and setting the number of work queue
entries (WQE) based on all the procs on a remote node. Since that is
no longer the case we can not simply increment the sd_wqe field on the
queue pair. To fix the issue a new field has been added to the xrc
queue pair structure to keep track of how many wqes there are total on
the queue pair. If a new endpoint is added that increases the number
of wqes and the xrc queue pair is already connected the code will
attempt to modify the number of wqes on the queue pair. A failure is
ignored because all that will happen is the number of active send work
requests on an XRC queue pair will be more limited.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
2016-05-26 15:58:31 -06:00

59 строки
2.1 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* @file
*/
#ifndef MCA_BTL_OPENIB_XRC_H
#define MCA_BTL_OPENIB_XRC_H
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#if HAVE_XRC
#define MCA_BTL_XRC_ENABLED (mca_btl_openib_component.num_xrc_qps)
#else
#define MCA_BTL_XRC_ENABLED 0
#endif
typedef enum {
MCA_BTL_IB_ADDR_CONNECTING = 100,
MCA_BTL_IB_ADDR_CONNECTED,
MCA_BTL_IB_ADDR_CLOSED
} mca_btl_openib_ib_addr_state_t;
struct ib_address_t {
opal_list_item_t super;
void *key; /* the key with size 80bit - [subnet(64) LID(16bit)] */
uint64_t subnet_id; /* caching subnet_id */
uint16_t lid; /* caching lid */
opal_list_t pending_ep; /* list of endpoints that use this ib_address */
mca_btl_openib_qp_t *qp; /* pointer to qp that will be used
for communication with the
destination */
uint32_t remote_xrc_rcv_qp_num; /* remote xrc qp number */
opal_mutex_t addr_lock; /* protection */
mca_btl_openib_ib_addr_state_t status; /* ib port status */
int32_t max_wqe;
};
typedef struct ib_address_t ib_address_t;
int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device);
int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device);
int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id,
opal_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep);
#endif