Also show the "you might not have enough registered memory" warning
message earlier in the openib BTL startup sequence This commit was SVN r21469.
Этот коммит содержится в:
родитель
771ce035a5
Коммит
c39998db17
@ -99,8 +99,8 @@ mca_btl_openib_module_t mca_btl_openib_module = {
|
||||
|
||||
static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl);
|
||||
|
||||
static void show_init_error(const char *file, int line,
|
||||
const char *func, const char *dev)
|
||||
void mca_btl_openib_show_init_error(const char *file, int line,
|
||||
const char *func, const char *dev)
|
||||
{
|
||||
if (ENOMEM == errno) {
|
||||
int ret;
|
||||
@ -166,15 +166,16 @@ static int adjust_cq(mca_btl_openib_device_t *device, const int cq)
|
||||
0);
|
||||
|
||||
if (NULL == device->ib_cq[cq]) {
|
||||
show_init_error(__FILE__, __LINE__, "ibv_create_cq",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__, "ibv_create_cq",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_PROGRESS_THREADS == 1
|
||||
if(ibv_req_notify_cq(device->ib_cq[cq], 0)) {
|
||||
show_init_error(__FILE__, __LINE__, "ibv_req_notify_cq",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ibv_req_notify_cq",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -236,8 +237,9 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
|
||||
ibv_create_srq(openib_btl->device->ib_pd, &attr);
|
||||
}
|
||||
if (NULL == openib_btl->qps[qp].u.srq_qp.srq) {
|
||||
show_init_error(__FILE__, __LINE__, "ibv_create_srq",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ibv_create_srq",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -611,6 +611,13 @@ extern void mca_btl_openib_frag_progress_pending_put_get(
|
||||
extern int mca_btl_openib_ft_event(int state);
|
||||
|
||||
|
||||
/**
|
||||
* Show an error during init, particularly when running out of
|
||||
* registered memory.
|
||||
*/
|
||||
void mca_btl_openib_show_init_error(const char *file, int line,
|
||||
const char *func, const char *dev);
|
||||
|
||||
#define BTL_OPENIB_HP_CQ 0
|
||||
#define BTL_OPENIB_LP_CQ 1
|
||||
|
||||
|
@ -910,7 +910,7 @@ OBJ_CLASS_INSTANCE(mca_btl_openib_device_t, opal_object_t, device_construct,
|
||||
static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||
{
|
||||
mca_btl_openib_frag_init_data_t *init_data;
|
||||
int qp, length;
|
||||
int rc, qp, length;
|
||||
|
||||
#if OPAL_HAVE_THREADS
|
||||
if(mca_btl_openib_component.use_async_event_thread) {
|
||||
@ -985,16 +985,25 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||
init_data->order = MCA_BTL_NO_ORDER;
|
||||
init_data->list = &device->send_free_control;
|
||||
|
||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(
|
||||
&device->send_free_control,
|
||||
rc = ompi_free_list_init_ex_new(&device->send_free_control,
|
||||
sizeof(mca_btl_openib_send_control_frag_t), CACHE_LINE_SIZE,
|
||||
OBJ_CLASS(mca_btl_openib_send_control_frag_t), length,
|
||||
mca_btl_openib_component.buffer_alignment,
|
||||
mca_btl_openib_component.ib_free_list_num, -1,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data)) {
|
||||
return OMPI_ERROR;
|
||||
init_data);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
/* If we're "out of memory", this usually means that we ran
|
||||
out of registered memory, so show that error message */
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||
errno = ENOMEM;
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ompi_free_list_init_ex_new",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* setup all the qps */
|
||||
@ -1010,7 +1019,7 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||
init_data->order = qp;
|
||||
init_data->list = &device->qps[qp].send_free;
|
||||
|
||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(init_data->list,
|
||||
rc = ompi_free_list_init_ex_new(init_data->list,
|
||||
sizeof(mca_btl_openib_send_frag_t), CACHE_LINE_SIZE,
|
||||
OBJ_CLASS(mca_btl_openib_send_frag_t), length,
|
||||
mca_btl_openib_component.buffer_alignment,
|
||||
@ -1018,7 +1027,18 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
device->mpool, mca_btl_openib_frag_init,
|
||||
init_data)) {
|
||||
init_data);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
/* If we're "out of memory", this usually means that we
|
||||
ran out of registered memory, so show that error
|
||||
message */
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
||||
errno = ENOMEM;
|
||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||
"ompi_free_list_init_ex_new",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user