1
1

* MCA params btl_openib_use_eager_rdma can now override the

INI file use_eager_rdma value (fixes trac:1169)
 * fixed a typo in a MCA param help message
 * made the check for enabling short/eager RDMA more robust in the
   presence of progress threads; it now emits a show_help warning

This commit was SVN r18723.

The following Trac tickets were found above:
  Ticket 1169 --> https://svn.open-mpi.org/trac/ompi/ticket/1169
Этот коммит содержится в:
Jeff Squyres 2008-06-24 18:31:46 +00:00
родитель 578d1c15c6
Коммит ea21c31f44
4 изменённых файлов: 30 добавлений и 14 удалений

Просмотреть файл

@ -184,7 +184,7 @@ struct mca_btl_openib_component_t {
uint32_t ib_rnr_retry; uint32_t ib_rnr_retry;
uint32_t ib_max_rdma_dst_ops; uint32_t ib_max_rdma_dst_ops;
uint32_t ib_service_level; uint32_t ib_service_level;
uint32_t use_eager_rdma; int32_t use_eager_rdma;
int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for short messages, always */ int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for short messages, always */
int32_t eager_rdma_num; int32_t eager_rdma_num;
int32_t max_eager_rdma; int32_t max_eager_rdma;

Просмотреть файл

@ -877,9 +877,8 @@ static int prepare_hca_for_use(mca_btl_openib_hca_t *hca)
hca->endpoints = OBJ_NEW(opal_pointer_array_t); hca->endpoints = OBJ_NEW(opal_pointer_array_t);
opal_pointer_array_init(hca->endpoints, 10, INT_MAX, 10); opal_pointer_array_init(hca->endpoints, 10, INT_MAX, 10);
opal_pointer_array_add(&mca_btl_openib_component.hcas, hca); opal_pointer_array_add(&mca_btl_openib_component.hcas, hca);
if(mca_btl_openib_component.max_eager_rdma > 0 && if (mca_btl_openib_component.max_eager_rdma > 0 &&
mca_btl_openib_component.use_eager_rdma && hca->use_eager_rdma) {
hca->use_eager_rdma) {
hca->eager_rdma_buffers = hca->eager_rdma_buffers =
calloc(mca_btl_openib_component.max_eager_rdma * hca->btls, calloc(mca_btl_openib_component.max_eager_rdma * hca->btls,
sizeof(mca_btl_openib_endpoint_t*)); sizeof(mca_btl_openib_endpoint_t*));
@ -1558,10 +1557,20 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
} }
} }
/* If "use eager rdma" was set, then enable it on this HCA */ /* Should we use RDMA for short / eager messages? First check MCA
if (values.use_eager_rdma_set) { param, then check INI file values. */
if (mca_btl_openib_component.use_eager_rdma >= 0) {
hca->use_eager_rdma = mca_btl_openib_component.use_eager_rdma;
} else if (values.use_eager_rdma_set) {
hca->use_eager_rdma = values.use_eager_rdma; hca->use_eager_rdma = values.use_eager_rdma;
} }
/* Eager RDMA is not currently supported with progress threads */
if (hca->use_eager_rdma && OMPI_ENABLE_PROGRESS_THREADS) {
hca->use_eager_rdma = 0;
orte_show_help("help-mpi-btl-openib.txt",
"eager RDMA and progress threads", true);
}
opal_output(0, "Using eager rdma: %d\n", hca->use_eager_rdma);
#if HAVE_XRC #if HAVE_XRC
/* if user configured to run with XRC qp and the device doesn't /* if user configured to run with XRC qp and the device doesn't

Просмотреть файл

@ -223,7 +223,8 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival; mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival;
CHECK(reg_int("ib_max_inline_data", "Maximum size of inline data segment " CHECK(reg_int("ib_max_inline_data", "Maximum size of inline data segment "
"(-1 = use per-device devaults, 0 = run-time probe to discover max value, " "(-1 = use device default, "
"0 = run-time probe to discover max value, "
"otherwise must be >= 1)", "otherwise must be >= 1)",
-1, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO)); -1, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
mca_btl_openib_component.ib_max_inline_data = (int32_t) ival; mca_btl_openib_component.ib_max_inline_data = (int32_t) ival;
@ -365,13 +366,11 @@ int btl_openib_register_mca_params(void)
} }
mca_btl_openib_component.ib_service_level = (uint32_t) ival; mca_btl_openib_component.ib_service_level = (uint32_t) ival;
CHECK(reg_int("use_eager_rdma", "Use RDMA for eager messages", CHECK(reg_int("use_eager_rdma", "Use RDMA for eager messages "
1, &ival, 0)); "(-1 = use device default, 0 = do not use eager RDMA, "
mca_btl_openib_component.use_eager_rdma = (uint32_t) (ival != 0); "1 = use eager RDMA)",
#if OMPI_ENABLE_PROGRESS_THREADS == 1 -1, &ival, 0));
/* Fast rdma path isn't supported by PROGRESS_THREAD */ mca_btl_openib_component.use_eager_rdma = (int32_t) ival;
mca_btl_openib_component.use_eager_rdma = 0;
#endif
CHECK(reg_int("eager_rdma_threshold", CHECK(reg_int("eager_rdma_threshold",
"Use RDMA for short messages after this number of " "Use RDMA for short messages after this number of "

Просмотреть файл

@ -537,3 +537,11 @@ support) will be disabled.
Host: %s Host: %s
Device: %s Device: %s
#
[eager RDMA and progress threads]
WARNING: The openib BTL was directed to use "eager RDMA" for short
messages, but the openib BTL was compiled with progress threads
support. Short eager RDMA is not yet supported with progress threads;
its use has been disabled in this job.
This is a warning only; you job will attempt to continue.