Fixes trac:1285. Really.
This commit has the same commit message as r18450, but without the extra bonus memory corruption that was introduced. This commit was SVN r18467. The following SVN revision numbers were found above: r18450 --> open-mpi/ompi@5295902ebe The following Trac tickets were found above: Ticket 1285 --> https://svn.open-mpi.org/trac/ompi/ticket/1285
Этот коммит содержится в:
родитель
0500420bec
Коммит
64f61ebd07
@ -110,6 +110,13 @@ struct mca_btl_openib_qp_info_t {
|
||||
#define BTL_OPENIB_QP_TYPE_XRC(Q) \
|
||||
(BTL_OPENIB_QP_TYPE(Q) == MCA_BTL_OPENIB_XRC_QP)
|
||||
|
||||
typedef enum {
|
||||
BTL_OPENIB_RQ_SOURCE_DEFAULT,
|
||||
BTL_OPENIB_RQ_SOURCE_MCA,
|
||||
BTL_OPENIB_RQ_SOURCE_HCA_INI,
|
||||
BTL_OPENIB_RQ_SOURCE_HCA_MAX
|
||||
} btl_openib_receive_queues_source_t;
|
||||
|
||||
struct mca_btl_openib_component_t {
|
||||
mca_btl_base_component_1_0_1_t super; /**< base BTL component */
|
||||
|
||||
@ -197,6 +204,11 @@ struct mca_btl_openib_component_t {
|
||||
char *if_exclude;
|
||||
char **if_exclude_list;
|
||||
|
||||
/* MCA param btl_openib_receive_queues */
|
||||
char *receive_queues;
|
||||
/* Whether we got a non-default value of btl_openib_receive_queues */
|
||||
btl_openib_receive_queues_source_t receive_queues_source;
|
||||
|
||||
/** Colon-delimited list of filenames for HCA parameters */
|
||||
char *hca_params_file_names;
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "opal/mca/carto/carto.h"
|
||||
#include "opal/mca/carto/base/base.h"
|
||||
#include "opal/mca/paffinity/base/base.h"
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
@ -80,6 +81,11 @@ static int btl_openib_component_close(void);
|
||||
static mca_btl_base_module_t **btl_openib_component_init(int*, bool, bool);
|
||||
static int btl_openib_component_progress(void);
|
||||
|
||||
/*
|
||||
* Local variables
|
||||
*/
|
||||
static mca_btl_openib_hca_t *receive_queues_hca = NULL;
|
||||
|
||||
mca_btl_openib_component_t mca_btl_openib_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta information
|
||||
@ -149,6 +155,9 @@ static int btl_openib_component_close(void)
|
||||
ompi_btl_openib_connect_base_finalize();
|
||||
ompi_btl_openib_fd_finalize();
|
||||
ompi_btl_openib_ini_finalize();
|
||||
if (NULL != mca_btl_openib_component.receive_queues) {
|
||||
free(mca_btl_openib_component.receive_queues);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -664,8 +673,6 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
|
||||
static void hca_construct(mca_btl_openib_hca_t *hca)
|
||||
{
|
||||
int i;
|
||||
|
||||
hca->ib_dev = NULL;
|
||||
hca->ib_dev_context = NULL;
|
||||
hca->ib_pd = NULL;
|
||||
@ -687,13 +694,8 @@ static void hca_construct(mca_btl_openib_hca_t *hca)
|
||||
#if HAVE_XRC
|
||||
hca->xrc_fd = -1;
|
||||
#endif
|
||||
hca->qps = (mca_btl_openib_hca_qp_t*)calloc(mca_btl_openib_component.num_qps,
|
||||
sizeof(mca_btl_openib_hca_qp_t));
|
||||
hca->qps = NULL;
|
||||
OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t);
|
||||
for(i = 0; i < mca_btl_openib_component.num_qps; i++) {
|
||||
OBJ_CONSTRUCT(&hca->qps[i].send_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&hca->qps[i].recv_free, ompi_free_list_t);
|
||||
}
|
||||
OBJ_CONSTRUCT(&hca->send_free_control, ompi_free_list_t);
|
||||
}
|
||||
|
||||
@ -709,13 +711,14 @@ static void hca_destruct(mca_btl_openib_hca_t *hca)
|
||||
free(hca->eager_rdma_buffers);
|
||||
}
|
||||
OBJ_DESTRUCT(&hca->hca_lock);
|
||||
for(i = 0; i < mca_btl_openib_component.num_qps; i++) {
|
||||
OBJ_DESTRUCT(&hca->qps[i].send_free);
|
||||
OBJ_DESTRUCT(&hca->qps[i].recv_free);
|
||||
}
|
||||
OBJ_DESTRUCT(&hca->send_free_control);
|
||||
if(hca->qps)
|
||||
if (NULL != hca->qps) {
|
||||
for (i = 0; i < mca_btl_openib_component.num_qps; i++) {
|
||||
OBJ_DESTRUCT(&hca->qps[i].send_free);
|
||||
OBJ_DESTRUCT(&hca->qps[i].recv_free);
|
||||
}
|
||||
free(hca->qps);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_openib_hca_t, opal_object_t, hca_construct,
|
||||
@ -947,6 +950,9 @@ done:
|
||||
return num_ports;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prefer values that are already in the target
|
||||
*/
|
||||
static void merge_values(ompi_btl_openib_ini_values_t *target,
|
||||
ompi_btl_openib_ini_values_t *src)
|
||||
{
|
||||
@ -959,6 +965,10 @@ static void merge_values(ompi_btl_openib_ini_values_t *target,
|
||||
target->use_eager_rdma = src->use_eager_rdma;
|
||||
target->use_eager_rdma_set = true;
|
||||
}
|
||||
|
||||
if (NULL == target->receive_queues && NULL != src->receive_queues) {
|
||||
target->receive_queues = strdup(src->receive_queues);
|
||||
}
|
||||
}
|
||||
|
||||
static bool inline is_credit_message(const mca_btl_openib_recv_frag_t *frag)
|
||||
@ -969,6 +979,15 @@ static bool inline is_credit_message(const mca_btl_openib_recv_frag_t *frag)
|
||||
(MCA_BTL_OPENIB_CONTROL_CREDITS == chdr->type);
|
||||
}
|
||||
|
||||
static int32_t atoi_param(char *param, int32_t dflt)
|
||||
{
|
||||
if (NULL == param || '\0' == param[0]) {
|
||||
return dflt ? dflt : 1;
|
||||
}
|
||||
|
||||
return atoi(param);
|
||||
}
|
||||
|
||||
static void init_apm_port(mca_btl_openib_hca_t *hca, int port, uint16_t lid)
|
||||
{
|
||||
int index;
|
||||
@ -985,6 +1004,206 @@ static void init_apm_port(mca_btl_openib_hca_t *hca, int port, uint16_t lid)
|
||||
}
|
||||
}
|
||||
|
||||
static int setup_qps(void)
|
||||
{
|
||||
char **queues, **params = NULL;
|
||||
int num_xrc_qps = 0, num_pp_qps = 0, num_srq_qps = 0, qp = 0;
|
||||
uint32_t max_qp_size, max_size_needed;
|
||||
int32_t min_freelist_size = 0;
|
||||
int smallest_pp_qp = 0, ret = OMPI_ERROR;
|
||||
|
||||
queues = opal_argv_split(mca_btl_openib_component.receive_queues, ':');
|
||||
if (0 == opal_argv_count(queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"no qps in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERROR;
|
||||
goto error;
|
||||
}
|
||||
|
||||
while (queues[qp] != NULL) {
|
||||
if (0 == strncmp("P,", queues[qp], 2)) {
|
||||
num_pp_qps++;
|
||||
if (smallest_pp_qp > qp) {
|
||||
smallest_pp_qp = qp;
|
||||
}
|
||||
} else if (0 == strncmp("S,", queues[qp], 2)) {
|
||||
num_srq_qps++;
|
||||
} else if (0 == strncmp("X,", queues[qp], 2)) {
|
||||
#if HAVE_XRC
|
||||
num_xrc_qps++;
|
||||
#else
|
||||
orte_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
||||
orte_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERR_RESOURCE_UNAVAILABLE;
|
||||
goto error;
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid qp type in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues,
|
||||
queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
qp++;
|
||||
}
|
||||
/* Current XRC implementation can't used with other QP types - PP
|
||||
and SRQ */
|
||||
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
||||
orte_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Current XRC implementation can't used with btls_per_lid > 1 */
|
||||
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
|
||||
true, orte_process_info.nodename,
|
||||
mca_btl_openib_component.receive_queues, num_xrc_qps);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.num_pp_qps = num_pp_qps;
|
||||
mca_btl_openib_component.num_srq_qps = num_srq_qps;
|
||||
mca_btl_openib_component.num_xrc_qps = num_xrc_qps;
|
||||
mca_btl_openib_component.num_qps = num_pp_qps + num_srq_qps + num_xrc_qps;
|
||||
|
||||
mca_btl_openib_component.qp_infos = (mca_btl_openib_qp_info_t*)
|
||||
malloc(sizeof(mca_btl_openib_qp_info_t) *
|
||||
mca_btl_openib_component.num_qps);
|
||||
|
||||
qp = 0;
|
||||
#define P(N) (((N) > count) ? NULL : params[(N)])
|
||||
while (queues[qp] != NULL) {
|
||||
int count;
|
||||
int32_t rd_low, rd_num;
|
||||
params = opal_argv_split_with_empty(queues[qp], ',');
|
||||
count = opal_argv_count(params);
|
||||
|
||||
if ('P' == params[0][0]) {
|
||||
int32_t rd_win, rd_rsv;
|
||||
if (count < 3 || count > 6) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid pp qp specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_PP_QP;
|
||||
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
||||
rd_num = atoi_param(P(2), 256);
|
||||
/* by default set rd_low to be 3/4 of rd_num */
|
||||
rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
|
||||
rd_win = atoi_param(P(4), (rd_num - rd_low) * 2);
|
||||
rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win);
|
||||
|
||||
BTL_VERBOSE(("pp: rd_num is %d rd_low is %d rd_win %d rd_rsv %d",
|
||||
rd_num, rd_low, rd_win, rd_rsv));
|
||||
|
||||
/* Calculate the smallest freelist size that can be allowed */
|
||||
if (rd_num + rd_rsv > min_freelist_size) {
|
||||
min_freelist_size = rd_num + rd_rsv;
|
||||
}
|
||||
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_win = rd_win;
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv = rd_rsv;
|
||||
if ((rd_num - rd_low) > rd_win) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "non optimal rd_win",
|
||||
true, rd_win, rd_num - rd_low);
|
||||
}
|
||||
} else {
|
||||
int32_t sd_max;
|
||||
if (count < 3 || count > 5) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid srq specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.qp_infos[qp].type = (params[0][0] =='X') ?
|
||||
MCA_BTL_OPENIB_XRC_QP : MCA_BTL_OPENIB_SRQ_QP;
|
||||
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
||||
rd_num = atoi_param(P(2), 256);
|
||||
/* by default set rd_low to be 3/4 of rd_num */
|
||||
rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
|
||||
sd_max = atoi_param(P(4), rd_low / 4);
|
||||
BTL_VERBOSE(("srq: rd_num is %d rd_low is %d sd_max is %d",
|
||||
rd_num, rd_low, sd_max));
|
||||
|
||||
/* Calculate the smallest freelist size that can be allowed */
|
||||
if (rd_num > min_freelist_size) {
|
||||
min_freelist_size = rd_num;
|
||||
}
|
||||
|
||||
mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max = sd_max;
|
||||
}
|
||||
|
||||
if (rd_num <= rd_low) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.qp_infos[qp].rd_num = rd_num;
|
||||
mca_btl_openib_component.qp_infos[qp].rd_low = rd_low;
|
||||
opal_argv_free(params);
|
||||
qp++;
|
||||
}
|
||||
params = NULL;
|
||||
|
||||
/* Sanity check some sizes */
|
||||
|
||||
max_qp_size = mca_btl_openib_component.qp_infos[mca_btl_openib_component.num_qps - 1].size;
|
||||
max_size_needed = (mca_btl_openib_module.super.btl_eager_limit >
|
||||
mca_btl_openib_module.super.btl_max_send_size) ?
|
||||
mca_btl_openib_module.super.btl_eager_limit :
|
||||
mca_btl_openib_module.super.btl_max_send_size;
|
||||
if (max_qp_size < max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too small", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
} else if (max_qp_size > max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too big", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
}
|
||||
|
||||
if (mca_btl_openib_component.ib_free_list_max > 0 &&
|
||||
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
||||
orte_process_info.nodename,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
min_freelist_size);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
mca_btl_openib_component.rdma_qp = mca_btl_openib_component.num_qps - 1;
|
||||
mca_btl_openib_component.credits_qp = smallest_pp_qp;
|
||||
|
||||
ret = OMPI_SUCCESS;
|
||||
error:
|
||||
if (NULL != params) {
|
||||
opal_argv_free(params);
|
||||
}
|
||||
|
||||
if (NULL != queues) {
|
||||
opal_argv_free(queues);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
{
|
||||
struct mca_mpool_base_resources_t mpool_resources;
|
||||
@ -1023,26 +1242,12 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
/* If mca_btl_if_include/exclude were specified, get usable ports */
|
||||
allowed_ports = (int*)malloc(hca->ib_dev_attr.phys_port_cnt * sizeof(int));
|
||||
port_cnt = get_port_list(hca, allowed_ports);
|
||||
if(0 == port_cnt) {
|
||||
ret = OMPI_SUCCESS;
|
||||
if (0 == port_cnt) {
|
||||
free(allowed_ports);
|
||||
goto error;
|
||||
}
|
||||
#if HAVE_XRC
|
||||
/* if user configured to run with XRC qp and the device don't support it -
|
||||
* we should ignore this hca. Maybe we have other one that have XRC support
|
||||
*/
|
||||
if (!(hca->ib_dev_attr.device_cap_flags & IBV_DEVICE_XRC) &&
|
||||
mca_btl_openib_component.num_xrc_qps > 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"XRC on device without XRC support", true,
|
||||
mca_btl_openib_component.num_xrc_qps,
|
||||
ibv_get_device_name(hca->ib_dev),
|
||||
orte_process_info.nodename);
|
||||
ret = OMPI_SUCCESS;
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Load in vendor/part-specific HCA parameters. Note that even if
|
||||
we don't find values for this vendor/part, "values" will be set
|
||||
indicating that it does not have good values */
|
||||
@ -1102,11 +1307,67 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
hca->mtu = mca_btl_openib_component.ib_mtu;
|
||||
}
|
||||
|
||||
/* If the user specified btl_openib_receive_queues MCA param, it
|
||||
overrides all HCA INI params */
|
||||
if (BTL_OPENIB_RQ_SOURCE_MCA !=
|
||||
mca_btl_openib_component.receive_queues_source &&
|
||||
NULL != values.receive_queues) {
|
||||
/* If a prior HCA's INI values set a different value for
|
||||
receive_queues, this is unsupported (see
|
||||
https://svn.open-mpi.org/trac/ompi/ticket/1285) */
|
||||
if (BTL_OPENIB_RQ_SOURCE_HCA_INI ==
|
||||
mca_btl_openib_component.receive_queues_source) {
|
||||
if (0 != strcmp(values.receive_queues,
|
||||
mca_btl_openib_component.receive_queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"conflicting receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
ibv_get_device_name(hca->ib_dev),
|
||||
hca->ib_dev_attr.vendor_id,
|
||||
hca->ib_dev_attr.vendor_part_id,
|
||||
values.receive_queues,
|
||||
ibv_get_device_name(receive_queues_hca->ib_dev),
|
||||
receive_queues_hca->ib_dev_attr.vendor_id,
|
||||
receive_queues_hca->ib_dev_attr.vendor_part_id,
|
||||
mca_btl_openib_component.receive_queues,
|
||||
opal_install_dirs.pkgdatadir);
|
||||
ret = OMPI_ERR_RESOURCE_BUSY;
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
if (NULL != mca_btl_openib_component.receive_queues) {
|
||||
free(mca_btl_openib_component.receive_queues);
|
||||
}
|
||||
receive_queues_hca = hca;
|
||||
mca_btl_openib_component.receive_queues =
|
||||
strdup(values.receive_queues);
|
||||
mca_btl_openib_component.receive_queues_source =
|
||||
BTL_OPENIB_RQ_SOURCE_HCA_INI;
|
||||
}
|
||||
}
|
||||
|
||||
/* If "use eager rdma" was set, then enable it on this HCA */
|
||||
if (values.use_eager_rdma_set) {
|
||||
hca->use_eager_rdma = values.use_eager_rdma;
|
||||
}
|
||||
|
||||
#if HAVE_XRC
|
||||
/* if user configured to run with XRC qp and the device doesn't
|
||||
* support it - we should ignore this hca. Maybe we have another
|
||||
* one that has XRC support
|
||||
*/
|
||||
if (!(hca->ib_dev_attr.device_cap_flags & IBV_DEVICE_XRC) &&
|
||||
mca_btl_openib_component.num_xrc_qps > 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"XRC on device without XRC support", true,
|
||||
mca_btl_openib_component.num_xrc_qps,
|
||||
ibv_get_device_name(hca->ib_dev),
|
||||
orte_process_info.nodename);
|
||||
ret = OMPI_SUCCESS;
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Allocate the protection domain for the HCA */
|
||||
hca->ib_pd = ibv_alloc_pd(hca->ib_dev_context);
|
||||
if(NULL == hca->ib_pd){
|
||||
@ -1199,10 +1460,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
"apm not enough ports", true);
|
||||
mca_btl_openib_component.apm_ports = 0;
|
||||
}
|
||||
ret = prepare_hca_for_use(hca);
|
||||
if(OMPI_SUCCESS == ret) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
error:
|
||||
@ -1560,10 +1818,6 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
|
||||
dev_sorted = sort_devs_by_distance(ib_devs, num_devs);
|
||||
|
||||
/* We must loop through all the hca id's, get their handles and
|
||||
for each hca we query the number of ports on the hca and set up
|
||||
a distinct btl module for each hca port */
|
||||
|
||||
OBJ_CONSTRUCT(&btl_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, opal_mutex_t);
|
||||
#if OMPI_HAVE_THREADS
|
||||
@ -1585,7 +1839,8 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"error in hca init", true, orte_process_info.nodename);
|
||||
"error in hca init", true, orte_process_info.nodename,
|
||||
ibv_get_device_name(dev_sorted[i].ib_dev));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1612,6 +1867,45 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Setup the BSRQ QP's based on the final value of
|
||||
mca_btl_openib_component.receive_queues. */
|
||||
setup_qps();
|
||||
|
||||
/* Loop through all the btl modules that we made and find every
|
||||
base HCA that doesn't have hca->qps setup on it yet (remember
|
||||
that some modules may share the same HCA, so when going through
|
||||
to loop, we may hit an HCA that was already setup earlier in
|
||||
the loop). */
|
||||
for (item = opal_list_get_first(&btl_list);
|
||||
opal_list_get_end(&btl_list) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
mca_btl_base_selected_module_t *m =
|
||||
(mca_btl_base_selected_module_t*) item;
|
||||
mca_btl_openib_hca_t *hca =
|
||||
((mca_btl_openib_module_t*) m->btl_module)->hca;
|
||||
if (NULL == hca->qps) {
|
||||
|
||||
/* Setup the HCA qps info */
|
||||
hca->qps = (mca_btl_openib_hca_qp_t*)
|
||||
calloc(mca_btl_openib_component.num_qps,
|
||||
sizeof(mca_btl_openib_hca_qp_t));
|
||||
for (i = 0; i < mca_btl_openib_component.num_qps; i++) {
|
||||
OBJ_CONSTRUCT(&hca->qps[i].send_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&hca->qps[i].recv_free, ompi_free_list_t);
|
||||
}
|
||||
|
||||
/* Do finial init on HCA */
|
||||
ret = prepare_hca_for_use(hca);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"error in hca init", true,
|
||||
orte_process_info.nodename,
|
||||
ibv_get_device_name(hca->ib_dev));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate space for btl modules */
|
||||
mca_btl_openib_component.openib_btls =
|
||||
malloc(sizeof(mca_btl_openib_module_t*) *
|
||||
|
@ -23,6 +23,8 @@
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include "orte/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
@ -388,6 +390,12 @@ static int parse_line(parsed_section_values_t *sv)
|
||||
sv->values.use_eager_rdma_set = true;
|
||||
}
|
||||
|
||||
else if (0 == strcasecmp(key_buffer, "receive_queues")) {
|
||||
/* Single value (already strdup'ed) */
|
||||
sv->values.receive_queues = value;
|
||||
value = NULL;
|
||||
}
|
||||
|
||||
else {
|
||||
/* Have no idea what this parameter is. Not an error -- just
|
||||
ignore it */
|
||||
@ -429,6 +437,9 @@ static void hca_values_destructor(hca_values_t *s)
|
||||
if (NULL != s->section_name) {
|
||||
free(s->section_name);
|
||||
}
|
||||
if (NULL != s->values.receive_queues) {
|
||||
free(s->values.receive_queues);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -469,6 +480,8 @@ static void reset_values(ompi_btl_openib_ini_values_t *v)
|
||||
|
||||
v->use_eager_rdma = 0;
|
||||
v->use_eager_rdma_set = false;
|
||||
|
||||
v->receive_queues = NULL;
|
||||
}
|
||||
|
||||
|
||||
@ -532,6 +545,10 @@ static int save_section(parsed_section_values_t *s)
|
||||
containing bool members by value. So do a memcpy
|
||||
here instead. */
|
||||
memcpy(&h->values, &s->values, sizeof(s->values));
|
||||
/* Need to strdup the string, though */
|
||||
if (NULL != h->values.receive_queues) {
|
||||
h->values.receive_queues = strdup(s->values.receive_queues);
|
||||
}
|
||||
opal_list_append(&hcas, &h->super);
|
||||
}
|
||||
}
|
||||
@ -586,14 +603,26 @@ static int intify_list(char *value, uint32_t **values, int *len)
|
||||
*values[0] = (uint32_t) intify(str);
|
||||
*len = 1;
|
||||
} else {
|
||||
/* If we found a comma, loop over all the values. Be a
|
||||
little clever in that we alwasy alloc enough space for
|
||||
an extra value so that when we exit the loop, we don't
|
||||
have to realloc again to get space for the last item. */
|
||||
int newsize = 1;
|
||||
|
||||
/* Count how many values there are and allocate enough space
|
||||
for them */
|
||||
while (NULL != comma) {
|
||||
++newsize;
|
||||
str = comma + 1;
|
||||
comma = strchr(str, ',');
|
||||
}
|
||||
*values = malloc(sizeof(uint32_t) * newsize);
|
||||
if (NULL == *values) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Iterate over the values and save them */
|
||||
str = value;
|
||||
comma = strchr(str, ',');
|
||||
do {
|
||||
*comma = '\0';
|
||||
*values = realloc(*values, sizeof(uint32_t) * (*len + 2));
|
||||
(*values)[*len] = (int32_t) intify(str);
|
||||
(*values)[*len] = (uint32_t) intify(str);
|
||||
++(*len);
|
||||
str = comma + 1;
|
||||
comma = strchr(str, ',');
|
||||
|
@ -25,6 +25,8 @@ typedef struct ompi_btl_openib_ini_values_t {
|
||||
|
||||
uint32_t use_eager_rdma;
|
||||
bool use_eager_rdma_set;
|
||||
|
||||
char *receive_queues;
|
||||
} ompi_btl_openib_ini_values_t;
|
||||
|
||||
|
||||
|
@ -52,8 +52,6 @@ enum {
|
||||
REGSTR_MAX = 0x88
|
||||
};
|
||||
|
||||
static int mca_btl_openib_mca_setup_qps(void);
|
||||
|
||||
|
||||
/*
|
||||
* utility routine for string parameter registration
|
||||
@ -109,6 +107,9 @@ static inline int reg_int(const char* param_name, const char* param_desc,
|
||||
*/
|
||||
int btl_openib_register_mca_params(void)
|
||||
{
|
||||
char default_qps[100];
|
||||
uint32_t mid_qp_size;
|
||||
int i;
|
||||
char *msg, *str;
|
||||
int ival, ival2, ret, tmp;
|
||||
|
||||
@ -485,7 +486,33 @@ int btl_openib_register_mca_params(void)
|
||||
&mca_btl_openib_module.super));
|
||||
|
||||
/* setup all the qp stuff */
|
||||
CHECK(mca_btl_openib_mca_setup_qps());
|
||||
mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4;
|
||||
/* round mid_qp_size to smallest power of two */
|
||||
for(i = 31; i > 0; i--) {
|
||||
if(!(mid_qp_size & (1<<i))) {
|
||||
continue;
|
||||
}
|
||||
mid_qp_size = (1<<i);
|
||||
break;
|
||||
}
|
||||
|
||||
if(mid_qp_size <= 128) {
|
||||
mid_qp_size = 1024;
|
||||
}
|
||||
|
||||
snprintf(default_qps, 100,
|
||||
"P,128,256,192,128:S,%u,256,128,32:S,%u,256,128,32:S,%u,256,128,32",
|
||||
mid_qp_size,
|
||||
(uint32_t)mca_btl_openib_module.super.btl_eager_limit,
|
||||
(uint32_t)mca_btl_openib_module.super.btl_max_send_size);
|
||||
CHECK(reg_string("receive_queues",
|
||||
"Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
|
||||
default_qps, &mca_btl_openib_component.receive_queues,
|
||||
0));
|
||||
mca_btl_openib_component.receive_queues_source =
|
||||
(0 == strcmp(default_qps,
|
||||
mca_btl_openib_component.receive_queues)) ?
|
||||
BTL_OPENIB_RQ_SOURCE_DEFAULT : BTL_OPENIB_RQ_SOURCE_MCA;
|
||||
|
||||
CHECK(reg_string("if_include",
|
||||
"Comma-delimited list of HCAs/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found). Mutually exclusive with btl_openib_if_exclude.",
|
||||
@ -497,232 +524,9 @@ int btl_openib_register_mca_params(void)
|
||||
NULL, &mca_btl_openib_component.if_exclude,
|
||||
0));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int32_t atoi_param(char *param, int32_t dflt)
|
||||
{
|
||||
if(NULL == param || '\0' == param[0])
|
||||
return dflt ? dflt : 1;
|
||||
|
||||
return atoi(param);
|
||||
}
|
||||
|
||||
static int mca_btl_openib_mca_setup_qps(void)
|
||||
{
|
||||
/* All the multi-qp stuff.. */
|
||||
char *str;
|
||||
char **queues, **params = NULL;
|
||||
int num_xrc_qps = 0, num_pp_qps = 0, num_srq_qps = 0, qp = 0;
|
||||
char default_qps[100];
|
||||
uint32_t max_qp_size, max_size_needed;
|
||||
int32_t min_freelist_size = 0;
|
||||
int smallest_pp_qp = 0, ret = OMPI_ERROR, i;
|
||||
uint32_t mid_qp_size;
|
||||
|
||||
mid_qp_size = mca_btl_openib_module.super.btl_eager_limit / 4;
|
||||
/* round mid_qp_size to smallest power of two */
|
||||
for(i = 31; i > 0; i--) {
|
||||
if(!(mid_qp_size & (1<<i)))
|
||||
continue;
|
||||
mid_qp_size = (1<<i);
|
||||
break;
|
||||
}
|
||||
|
||||
if(mid_qp_size <= 128)
|
||||
mid_qp_size = 1024;
|
||||
|
||||
snprintf(default_qps, 100,
|
||||
"P,128,256,192,128:S,%u,256,128,32:S,%u,256,128,32:S,%u,256,128,32",
|
||||
mid_qp_size,
|
||||
(uint32_t)mca_btl_openib_module.super.btl_eager_limit,
|
||||
(uint32_t)mca_btl_openib_module.super.btl_max_send_size);
|
||||
reg_string("receive_queues",
|
||||
"Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
|
||||
default_qps, &str, 0);
|
||||
queues = opal_argv_split(str, ':');
|
||||
|
||||
if (0 == opal_argv_count(queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"no qps in receive_queues", true,
|
||||
orte_process_info.nodename, str);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
while (queues[qp] != NULL) {
|
||||
if (0 == strncmp("P,", queues[qp], 2)) {
|
||||
num_pp_qps++;
|
||||
if(smallest_pp_qp > qp)
|
||||
smallest_pp_qp = qp;
|
||||
} else if (0 == strncmp("S,", queues[qp], 2)) {
|
||||
num_srq_qps++;
|
||||
} else if (0 == strncmp("X,", queues[qp], 2)) {
|
||||
#if HAVE_XRC
|
||||
num_xrc_qps++;
|
||||
#else
|
||||
orte_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
||||
orte_process_info.nodename, str);
|
||||
goto error;
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid qp type in receive_queues", true,
|
||||
orte_process_info.nodename, str, queues[qp]);
|
||||
goto error;
|
||||
}
|
||||
qp++;
|
||||
}
|
||||
/* Current XRC implementation can't used with other QP types - PP and SRQ */
|
||||
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
||||
orte_process_info.nodename, str);
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Current XRC implementation can't used with btls_per_lid > 1 */
|
||||
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID", true,
|
||||
orte_process_info.nodename, str, num_xrc_qps);
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.num_pp_qps = num_pp_qps;
|
||||
mca_btl_openib_component.num_srq_qps = num_srq_qps;
|
||||
mca_btl_openib_component.num_xrc_qps = num_xrc_qps;
|
||||
mca_btl_openib_component.num_qps = num_pp_qps + num_srq_qps + num_xrc_qps;
|
||||
|
||||
mca_btl_openib_component.qp_infos = (mca_btl_openib_qp_info_t*)
|
||||
malloc(sizeof(mca_btl_openib_qp_info_t) *
|
||||
mca_btl_openib_component.num_qps);
|
||||
|
||||
qp = 0;
|
||||
#define P(N) (((N) > count)?NULL:params[(N)])
|
||||
while(queues[qp] != NULL) {
|
||||
int i = 0, count;
|
||||
int32_t rd_low, rd_num;
|
||||
params = opal_argv_split_with_empty(queues[qp], ',');
|
||||
count = opal_argv_count(params);
|
||||
|
||||
if ('P' == params[0][0]) {
|
||||
int32_t rd_win, rd_rsv;
|
||||
if (count < 3 || count > 6) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid pp qp specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_PP_QP;
|
||||
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
||||
rd_num = atoi_param(P(2), 256);
|
||||
/* by default set rd_low to be 3/4 of rd_num */
|
||||
rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
|
||||
rd_win = atoi_param(P(4), (rd_num - rd_low) * 2);
|
||||
rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win);
|
||||
|
||||
BTL_VERBOSE(("pp: rd_num is %d rd_low is %d rd_win %d rd_rsv %d",
|
||||
rd_num, rd_low, rd_win, rd_rsv));
|
||||
|
||||
/* Calculate the smallest freelist size that can be allowed */
|
||||
if (rd_num + rd_rsv > min_freelist_size)
|
||||
min_freelist_size = rd_num + rd_rsv;
|
||||
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_win = rd_win;
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv = rd_rsv;
|
||||
if((rd_num - rd_low) > rd_win)
|
||||
orte_show_help("help-mpi-btl-openib.txt", "non optimal rd_win",
|
||||
true, rd_win, rd_num - rd_low);
|
||||
} else {
|
||||
int32_t sd_max;
|
||||
if(count < 3 || count > 5) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid srq specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.qp_infos[qp].type = (params[0][0] =='X') ?
|
||||
MCA_BTL_OPENIB_XRC_QP : MCA_BTL_OPENIB_SRQ_QP;
|
||||
mca_btl_openib_component.qp_infos[qp].size = atoi_param(P(1), 0);
|
||||
rd_num = atoi_param(P(2), 256);
|
||||
/* by default set rd_low to be 3/4 of rd_num */
|
||||
rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
|
||||
sd_max = atoi_param(P(4), rd_low / 4);
|
||||
BTL_VERBOSE(("srq: rd_num is %d rd_low is %d sd_max is %d",
|
||||
rd_num, rd_low, sd_max));
|
||||
|
||||
/* Calculate the smallest freelist size that can be allowed */
|
||||
if (rd_num > min_freelist_size)
|
||||
min_freelist_size = rd_num;
|
||||
|
||||
mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max = sd_max;
|
||||
}
|
||||
|
||||
if (rd_num <= rd_low) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
goto error;
|
||||
}
|
||||
mca_btl_openib_component.qp_infos[qp].rd_num = rd_num;
|
||||
mca_btl_openib_component.qp_infos[qp].rd_low = rd_low;
|
||||
while (NULL != params[i]) {
|
||||
free(params[i++]);
|
||||
}
|
||||
free(params);
|
||||
qp++;
|
||||
}
|
||||
params = NULL;
|
||||
|
||||
/* Sanity check some sizes */
|
||||
|
||||
max_qp_size = mca_btl_openib_component.qp_infos[mca_btl_openib_component.num_qps - 1].size;
|
||||
max_size_needed = (mca_btl_openib_module.super.btl_eager_limit >
|
||||
mca_btl_openib_module.super.btl_max_send_size) ?
|
||||
mca_btl_openib_module.super.btl_eager_limit :
|
||||
mca_btl_openib_module.super.btl_max_send_size;
|
||||
if (max_qp_size < max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too small", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
ret = OMPI_ERROR;
|
||||
goto error;
|
||||
} else if (max_qp_size > max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too big", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
orte_output(0, "The biggest QP size is bigger than maximum send size. "
|
||||
"This is not optimal configuration as memory will be wasted.");
|
||||
}
|
||||
|
||||
if (mca_btl_openib_component.ib_free_list_max > 0 &&
|
||||
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
||||
orte_process_info.nodename,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
min_freelist_size);
|
||||
goto error;
|
||||
}
|
||||
|
||||
mca_btl_openib_component.rdma_qp = mca_btl_openib_component.num_qps - 1;
|
||||
mca_btl_openib_component.credits_qp = smallest_pp_qp;
|
||||
|
||||
/* Register any MCA params for the connect pseudo-components */
|
||||
if (OMPI_SUCCESS != ompi_btl_openib_connect_base_register())
|
||||
goto error;
|
||||
|
||||
ret = OMPI_SUCCESS;
|
||||
error:
|
||||
if(params) {
|
||||
qp = 0;
|
||||
while(params[qp] != NULL)
|
||||
free(params[qp++]);
|
||||
free(params);
|
||||
}
|
||||
|
||||
if(queues) {
|
||||
qp = 0;
|
||||
while(queues[qp] != NULL)
|
||||
free(queues[qp++]);
|
||||
free(queues);
|
||||
if (OMPI_SUCCESS == ret) {
|
||||
ret = ompi_btl_openib_connect_base_register();
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -169,6 +169,12 @@ no active ports detected. This is most certainly not what you wanted.
|
||||
Check your cables and SM configuration.
|
||||
#
|
||||
[error in hca init]
|
||||
WARNING: There was an error initializing an OpenFabrics NIC/HCA.
|
||||
|
||||
Hostname: %s
|
||||
Device: %s
|
||||
#
|
||||
[error in hca init]
|
||||
WARNING: There were errors during IB HCA initialization on host '%s'.
|
||||
#
|
||||
[default subnet prefix]
|
||||
@ -448,3 +454,20 @@ Can not provide %d alternative paths with LMC bit configured to %d.
|
||||
[apm not enough ports]
|
||||
WARNING: For APM over ports ompi require at least 2 active ports and only single
|
||||
active port was found. Disabling APM over ports
|
||||
#
|
||||
[conflicting receive_queues]
|
||||
Open MPI detected two different sets of OpenFabrics receives queues on
|
||||
the same host (in the openib BTL). Open MPI currently only supports
|
||||
one set of OF receive queues in an MPI job, even if you have different
|
||||
types of OpenFabrics adapters on the same host.
|
||||
|
||||
Host: %s
|
||||
Adapter 1: %s (vendor 0x%x, part ID %d)
|
||||
Queues: %s
|
||||
Adapter 2: %s (vendor 0x%x, part ID %d)
|
||||
Queues: %s
|
||||
|
||||
Note that these receive queues values may have come from the Open MPI
|
||||
adapter default settings file:
|
||||
|
||||
%s/mca-btl-openib-hca-params.ini
|
||||
|
@ -139,3 +139,4 @@ vendor_id = 0x1425
|
||||
vendor_part_id = 0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0030,0x0031,0x0032
|
||||
use_eager_rdma = 1
|
||||
mtu = 2048
|
||||
receive_queues = P,65536,256,192,128
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user