1
1

Fix up error handling in openib.. Added a simple debug test for memory

registration.. 

This commit was SVN r6520.
Этот коммит содержится в:
Galen Shipman 2005-07-15 15:13:19 +00:00
родитель 213be28613
Коммит b75560796c
8 изменённых файлов: 185 добавлений и 165 удалений

Просмотреть файл

@ -30,6 +30,9 @@
#include "mca/mpool/base/base.h"
#include "mca/mpool/mpool.h"
#include "mca/mpool/openib/mpool_openib.h"
#include <errno.h>
#include <string.h>
extern int errno;
mca_btl_openib_module_t mca_btl_openib_module = {
{
@ -275,11 +278,9 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
OBJ_RELEASE(openib_reg);
openib_btl->ib_pool->mpool_register(openib_btl->ib_pool,
base_addr,
new_len,
(mca_mpool_base_registration_t**) &openib_reg);
base_addr,
new_len,
(mca_mpool_base_registration_t**) &openib_reg);
rc = mca_mpool_base_insert(openib_reg->base_reg.base,
openib_reg->base_reg.bound - openib_reg->base_reg.base + 1,
@ -405,7 +406,6 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->openib_reg = openib_reg;
OBJ_RETAIN(openib_reg);
return &frag->base;
} else if (max_data+reserve <= btl->btl_eager_limit) {
@ -720,7 +720,7 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
if(ibv_post_send(endpoint->lcl_qp_low,
&frag->sr_desc,
&bad_wr)){
opal_output(0, "%s: error posting send request\n", __func__);
opal_output(0, "%s: error posting send request errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}
@ -792,21 +792,30 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl)
if(NULL == openib_btl->ib_pd) {
opal_output(0, "%s: error allocating pd for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev));
opal_output(0, "%s: error allocating pd for %s errno says %s\n",
__func__,
ibv_get_device_name(openib_btl->ib_dev),
strerror(errno));
return OMPI_ERROR;
}
openib_btl->ib_cq_low = ibv_create_cq(ctx, openib_btl->ib_cq_size, NULL);
openib_btl->ib_cq_low = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
if(NULL == openib_btl->ib_cq_low) {
opal_output(0, "%s: error creating low priority cq for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev));
opal_output(0, "%s: error creating low priority cq for %s errno says %s\n",
__func__,
ibv_get_device_name(openib_btl->ib_dev),
strerror(errno));
return OMPI_ERROR;
}
openib_btl->ib_cq_high = ibv_create_cq(ctx, openib_btl->ib_cq_size, NULL);
openib_btl->ib_cq_high = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
if(NULL == openib_btl->ib_cq_high) {
opal_output(0, "%s: error creating high priority cq for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev));
opal_output(0, "%s: error creating high priority cq for %s errno says %s\n",
__func__,
ibv_get_device_name(openib_btl->ib_dev),
strerror(errno));
return OMPI_ERROR;
}

Просмотреть файл

@ -95,6 +95,22 @@ struct mca_btl_openib_component_t {
uint32_t leave_pinned;
uint32_t reg_mru_len;
uint32_t ib_cq_size; /**< Max outstanding CQE on the CQ */
uint32_t ib_wq_size; /**< Max outstanding WR on the WQ */
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ*/
uint32_t ib_pkey_ix;
uint32_t ib_psn;
uint32_t ib_qp_ous_rd_atom;
uint32_t ib_mtu;
uint32_t ib_min_rnr_timer;
uint32_t ib_timeout;
uint32_t ib_retry_count;
uint32_t ib_rnr_retry;
uint32_t ib_max_rdma_dst_ops;
uint32_t ib_service_level;
uint32_t ib_static_rate;
uint32_t ib_src_path_bits;
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
@ -147,23 +163,8 @@ struct mca_btl_openib_module_t {
/**< an array to allow posting of rr in one swoop */
size_t ib_inline_max; /**< max size of inline send*/
size_t ib_pin_min; /**< min size to pin memory*/
uint32_t ib_cq_size; /**< Max outstanding CQE on the CQ */
uint32_t ib_wq_size; /**< Max outstanding WR on the WQ */
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ*/
uint32_t ib_pkey_ix;
uint32_t ib_psn;
uint32_t ib_qp_ous_rd_atom;
uint32_t ib_mtu;
uint32_t ib_min_rnr_timer;
uint32_t ib_timeout;
uint32_t ib_retry_count;
uint32_t ib_rnr_retry;
uint32_t ib_max_rdma_dst_ops;
uint32_t ib_service_level;
uint32_t ib_static_rate;
uint32_t ib_src_path_bits;
}; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;

Просмотреть файл

@ -37,8 +37,10 @@
#include "mca/mpool/mvapi/mpool_mvapi.h"
#include <sysfs/libsysfs.h>
#include <infiniband/verbs.h>
#include <errno.h>
#include <string.h> /* for strerror()*/
extern int errno;
mca_btl_openib_component_t mca_btl_openib_component = {
{
/* First, the mca_base_component_t struct containing meta information
@ -132,6 +134,55 @@ int mca_btl_openib_component_open(void)
mca_btl_openib_component.reg_mru_len =
mca_btl_openib_param_register_int("reg_mru_len", 16);
mca_btl_openib_component.ib_cq_size =
mca_btl_openib_param_register_int("ib_cq_size",
500);
mca_btl_openib_component.ib_wq_size =
mca_btl_openib_param_register_int("ib_wq_size",
500);
mca_btl_openib_component.ib_sg_list_size =
mca_btl_openib_param_register_int("ib_sg_list_size",
1);
mca_btl_openib_component.ib_pkey_ix =
mca_btl_openib_param_register_int("ib_pkey_ix",
0);
mca_btl_openib_component.ib_psn =
mca_btl_openib_param_register_int("ib_psn",
0);
mca_btl_openib_component.ib_qp_ous_rd_atom =
mca_btl_openib_param_register_int("ib_qp_ous_rd_atom",
1);
mca_btl_openib_component.ib_mtu =
mca_btl_openib_param_register_int("ib_mtu",
IBV_MTU_1024);
mca_btl_openib_component.ib_min_rnr_timer =
mca_btl_openib_param_register_int("ib_min_rnr_timer",
5);
mca_btl_openib_component.ib_timeout =
mca_btl_openib_param_register_int("ib_timeout",
10);
mca_btl_openib_component.ib_retry_count =
mca_btl_openib_param_register_int("ib_retry_count",
7);
mca_btl_openib_component.ib_rnr_retry =
mca_btl_openib_param_register_int("ib_rnr_retry",
7);
mca_btl_openib_component.ib_max_rdma_dst_ops =
mca_btl_openib_param_register_int("ib_max_rdma_dst_ops",
16);
mca_btl_openib_component.ib_service_level =
mca_btl_openib_param_register_int("ib_service_level",
0);
mca_btl_openib_component.ib_static_rate =
mca_btl_openib_param_register_int("ib_static_rate",
0);
mca_btl_openib_component.ib_src_path_bits =
mca_btl_openib_param_register_int("ib_src_path_bits",
0);
mca_btl_openib_module.super.btl_exclusivity =
mca_btl_openib_param_register_int ("exclusivity", 0);
mca_btl_openib_module.super.btl_eager_limit =
@ -145,55 +196,6 @@ int mca_btl_openib_component_open(void)
mca_btl_openib_module.super.btl_max_send_size =
mca_btl_openib_param_register_int ("max_send_size", (128*1024))
- sizeof(mca_btl_openib_header_t);
mca_btl_openib_module.ib_pin_min =
mca_btl_openib_param_register_int("ib_pin_min", 128*1024);
mca_btl_openib_module.ib_cq_size =
mca_btl_openib_param_register_int("ib_cq_size",
40000);
mca_btl_openib_module.ib_wq_size =
mca_btl_openib_param_register_int("ib_wq_size",
10000);
mca_btl_openib_module.ib_sg_list_size =
mca_btl_openib_param_register_int("ib_sg_list_size",
1);
mca_btl_openib_module.ib_pkey_ix =
mca_btl_openib_param_register_int("ib_pkey_ix",
0);
mca_btl_openib_module.ib_psn =
mca_btl_openib_param_register_int("ib_psn",
0);
mca_btl_openib_module.ib_qp_ous_rd_atom =
mca_btl_openib_param_register_int("ib_qp_ous_rd_atom",
1);
mca_btl_openib_module.ib_mtu =
mca_btl_openib_param_register_int("ib_mtu",
IBV_MTU_1024);
mca_btl_openib_module.ib_min_rnr_timer =
mca_btl_openib_param_register_int("ib_min_rnr_timer",
5);
mca_btl_openib_module.ib_timeout =
mca_btl_openib_param_register_int("ib_timeout",
10);
mca_btl_openib_module.ib_retry_count =
mca_btl_openib_param_register_int("ib_retry_count",
7);
mca_btl_openib_module.ib_rnr_retry =
mca_btl_openib_param_register_int("ib_rnr_retry",
7);
mca_btl_openib_module.ib_max_rdma_dst_ops =
mca_btl_openib_param_register_int("ib_max_rdma_dst_ops",
16);
mca_btl_openib_module.ib_service_level =
mca_btl_openib_param_register_int("ib_service_level",
0);
mca_btl_openib_module.ib_static_rate =
mca_btl_openib_param_register_int("ib_static_rate",
0);
mca_btl_openib_module.ib_src_path_bits =
mca_btl_openib_param_register_int("ib_src_path_bits",
0);
mca_btl_openib_module.super.btl_min_rdma_size =
mca_btl_openib_param_register_int("min_rdma_size",
1024*1024);
@ -296,16 +298,50 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
for(i = 0; i < num_devs; i++){
struct ibv_device_attr ib_dev_attr;
struct ibv_context* ib_dev_context;
struct ibv_pd *my_pd;
struct ibv_mr *mr;
void* my_addr;
uint32_t my_size;
uint32_t my_indx;
uint32_t my_mult;
my_mult = 4096;
ib_dev = ib_devs[i];
ib_dev_context = ibv_open_device(ib_dev);
if(!ib_dev_context) {
opal_output(0, "%s: error obtaining device context for %s\n", __func__, ibv_get_device_name(ib_dev));
opal_output(0, "%s: error obtaining device context for %s errno says %s\n", __func__, ibv_get_device_name(ib_dev), strerror(errno));
return NULL;
}
my_pd = ibv_alloc_pd(ib_dev_context);
for(my_indx = 1; my_indx <= 8192; my_indx++){
my_size = my_mult * my_indx;
my_addr = memalign(4096, my_size);
memset(my_addr, 0, my_size);
mr = ibv_reg_mr(
my_pd,
my_addr,
my_size,
IBV_ACCESS_REMOTE_WRITE
/* IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE */
);
if(NULL == mr){
opal_output(0, "%s: error on mr test! can't register %lu bytes, errno says %s \n", __func__, my_size, strerror(errno));
break;
}
else {
opal_output(0, "%s: successfully registerted %lu bytes", __func__, my_size);
ibv_dereg_mr(mr);
}
}
if(ibv_query_device(ib_dev_context, &ib_dev_attr)){
opal_output(0, "%s: error obtaining device attributes for %s\n", __func__, ibv_get_device_name(ib_dev));
opal_output(0, "%s: error obtaining device attributes for %s errno says %s\n", __func__, ibv_get_device_name(ib_dev), strerror(errno));
return NULL;
}
@ -316,8 +352,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
struct ibv_port_attr* ib_port_attr;
ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr));
if(ibv_query_port(ib_dev_context, (uint8_t) j, ib_port_attr)){
opal_output(0, "%s: error getting port attributes for device %s port number %d",
__func__, ibv_get_device_name(ib_dev), j);
opal_output(0, "%s: error getting port attributes for device %s port number %d errno says %s",
__func__, ibv_get_device_name(ib_dev), j, strerror(errno));
return NULL;
}
@ -337,6 +373,9 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.ib_num_btls ++;
}
else{
free(ib_port_attr);
}
}
}
@ -501,7 +540,7 @@ int mca_btl_openib_component_progress()
do{
ne=ibv_poll_cq(openib_btl->ib_cq_high, 1, &wc );
if(ne < 0 ){
opal_output(0, "%s: error polling CQ with %d \n", __func__, ne);
opal_output(0, "%s: error polling CQ with %d errno says %s\n", __func__, ne, strerror(errno));
return OMPI_ERROR;
}
else if(wc.status != IBV_WC_SUCCESS) {
@ -562,7 +601,7 @@ int mca_btl_openib_component_progress()
ne=ibv_poll_cq(openib_btl->ib_cq_low, 1, &wc );
if(ne < 0){
opal_output(0, "%s: error polling CQ with %d \n", __func__, ne);
opal_output(0, "%s: error polling CQ with %d errno says %s\n", __func__, ne, strerror(errno));
return OMPI_ERROR;
}
else if(wc.status != IBV_WC_SUCCESS) {

Просмотреть файл

@ -30,6 +30,9 @@
#include "btl_openib_proc.h"
#include "btl_openib_frag.h"
#include "class/ompi_free_list.h"
#include <errno.h>
#include <string.h>
extern int errno;
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
@ -82,7 +85,7 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
if(ibv_post_send(ib_qp,
&frag->sr_desc,
&bad_wr)) {
opal_output(0, "%s: error posting send request\n", __func__);
opal_output(0, "%s: error posting send request errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}
mca_btl_openib_endpoint_post_rr(endpoint, 1);
@ -686,17 +689,17 @@ int mca_btl_openib_endpoint_create_qp(
struct ibv_qp_init_attr qp_init_attr;
qp_init_attr.send_cq = cq;
qp_init_attr.recv_cq = cq;
qp_init_attr.cap.max_send_wr = openib_btl->ib_wq_size;
qp_init_attr.cap.max_recv_wr = openib_btl->ib_wq_size;
qp_init_attr.cap.max_send_sge = openib_btl->ib_sg_list_size;
qp_init_attr.cap.max_recv_sge = openib_btl->ib_sg_list_size;
qp_init_attr.cap.max_send_wr = mca_btl_openib_component.ib_wq_size;
qp_init_attr.cap.max_recv_wr = mca_btl_openib_component.ib_wq_size;
qp_init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size;
qp_init_attr.cap.max_recv_sge = mca_btl_openib_component.ib_sg_list_size;
qp_init_attr.qp_type = IBV_QPT_RC;
(*qp) = ibv_create_qp(pd, &qp_init_attr);
if(NULL == (*qp)) {
opal_output(0, "%s: error creating qp \n", __func__);
opal_output(0, "%s: error creating qp errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}
@ -706,7 +709,7 @@ int mca_btl_openib_endpoint_create_qp(
{
qp_attr->qp_state = IBV_QPS_INIT;
qp_attr->pkey_index = openib_btl->ib_pkey_ix;
qp_attr->pkey_index = mca_btl_openib_component.ib_pkey_ix;
qp_attr->port_num = openib_btl->port_num;
qp_attr->qp_access_flags = 0;
@ -715,7 +718,7 @@ int mca_btl_openib_endpoint_create_qp(
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS )) {
opal_output(0, "%s: error modifying qp to INIT\n");
opal_output(0, "%s: error modifying qp to INIT errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}
}
@ -737,15 +740,15 @@ int mca_btl_openib_endpoint_qp_init_query(
{
attr->qp_state = IBV_QPS_RTR;
attr->path_mtu = openib_btl->ib_mtu;
attr->path_mtu = mca_btl_openib_component.ib_mtu;
attr->dest_qp_num = rem_qp_num;
attr->rq_psn = rem_psn;
attr->max_dest_rd_atomic = openib_btl->ib_max_rdma_dst_ops;
attr->min_rnr_timer = openib_btl->ib_min_rnr_timer;
attr->max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
attr->min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
attr->ah_attr.is_global = 0;
attr->ah_attr.dlid = rem_lid;
attr->ah_attr.sl = openib_btl->ib_service_level;
attr->ah_attr.src_path_bits = openib_btl->ib_src_path_bits;
attr->ah_attr.sl = mca_btl_openib_component.ib_service_level;
attr->ah_attr.src_path_bits = mca_btl_openib_component.ib_src_path_bits;
attr->ah_attr.port_num = port_num;
if(ibv_modify_qp(qp, attr,
@ -756,15 +759,15 @@ int mca_btl_openib_endpoint_qp_init_query(
IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER)) {
opal_output(0, "%s: error modifing QP to RTR\n", __func__);
opal_output(0, "%s: error modifing QP to RTR errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}
attr->qp_state = IBV_QPS_RTS;
attr->timeout = openib_btl->ib_timeout;
attr->retry_cnt = openib_btl->ib_retry_count;
attr->rnr_retry = openib_btl->ib_rnr_retry;
attr->timeout = mca_btl_openib_component.ib_timeout;
attr->retry_cnt = mca_btl_openib_component.ib_retry_count;
attr->rnr_retry = mca_btl_openib_component.ib_rnr_retry;
attr->sq_psn = lcl_psn;
attr->max_rd_atomic = openib_btl->ib_max_rdma_dst_ops;
attr->max_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
if (ibv_modify_qp(qp, attr,
IBV_QP_STATE |
IBV_QP_TIMEOUT |
@ -772,7 +775,7 @@ int mca_btl_openib_endpoint_qp_init_query(
IBV_QP_RNR_RETRY |
IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC)) {
opal_output(0, "%s: error modifying QP to RTS\n", __func__);
opal_output(0, "%s: error modifying QP to RTS errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}
return OMPI_SUCCESS;

Просмотреть файл

@ -23,6 +23,11 @@
#include "mca/btl/btl.h"
#include "btl_openib_frag.h"
#include "btl_openib.h"
#include <errno.h>
#include <string.h>
extern int errno;
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
@ -159,7 +164,7 @@ static inline int mca_btl_openib_endpoint_post_rr_sub(int cnt,
if(ibv_post_recv(qp,
&rr_desc_post[i],
&bad_wr)) {
opal_output(0, "%s: error posting receive\n", __func__);
opal_output(0, "%s: error posting receive errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}

Просмотреть файл

@ -23,7 +23,6 @@
#include "class/ompi_free_list.h"
#include "opal/event/event.h"
#include "mca/mpool/mpool.h"
#include "mca/allocator/allocator.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -42,7 +41,6 @@ static inline void* ALIGN_ADDR(void* addr, uint32_t cnt ) {
struct mca_mpool_openib_component_t {
mca_mpool_base_component_t super;
char* vapi_allocator_name;
long page_size;
long page_size_log;
};
@ -62,7 +60,6 @@ typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
struct mca_mpool_openib_module_t {
mca_mpool_base_module_t super;
mca_allocator_base_module_t * vapi_allocator;
struct mca_mpool_base_resources_t resources;
}; typedef struct mca_mpool_openib_module_t mca_mpool_openib_module_t;
@ -131,11 +128,6 @@ void mca_mpool_openib_free(mca_mpool_base_module_t* mpool,
void * addr,
mca_mpool_base_registration_t* registration);
void* mca_common_vapi_segment_alloc(
struct mca_mpool_base_module_t* module,
size_t* size,
mca_mpool_base_registration_t** registration);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -18,11 +18,11 @@
#include "opal/util/output.h"
#include "mca/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/allocator/base/base.h"
#include "mpool_openib.h"
#include "util/proc_info.h"
#include "util/sys_info.h"
#include <unistd.h>
#include <malloc.h>
/*
* Local functions
@ -99,7 +99,7 @@ static char* mca_mpool_openib_param_register_string(
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string("mpool","vapi",param_name,NULL,default_value);
int id = mca_base_param_register_string("mpool","openib",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
@ -110,37 +110,16 @@ static char* mca_mpool_openib_param_register_string(
*/
static int mca_mpool_openib_open(void)
{
/* register VAPI component parameters */
mca_mpool_openib_component.vapi_allocator_name =
mca_mpool_openib_param_register_string("allocator", "bucket");
/* get the page size for this architecture*/
mca_mpool_openib_component.page_size = sysconf(_SC_PAGESIZE);
return OMPI_SUCCESS;
}
/* Allocates a segment of memory and registers with IB, user_out returns the memory handle. */
void* mca_common_vapi_segment_alloc(
struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration)
{
void* addr_malloc = (void*)malloc((*size) + mca_mpool_openib_component.page_size);
void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_openib_component.page_size_log);
if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, *size, registration)) {
free(addr_malloc);
return NULL;
}
return addr;
}
/* Allocates a segment of memory and registers with IB, user_out returns the memory handle. */
static mca_mpool_base_module_t* mca_mpool_openib_init(
struct mca_mpool_base_resources_t* resources)
{
mca_mpool_openib_module_t* mpool_module;
mca_allocator_base_component_t* allocator_component;
long page_size = mca_mpool_openib_component.page_size;
mca_mpool_openib_component.page_size_log = 0;
@ -149,32 +128,12 @@ static mca_mpool_base_module_t* mca_mpool_openib_init(
mca_mpool_openib_component.page_size_log++;
}
/* if specified allocator cannout be loaded - look for an alternative */
allocator_component = mca_allocator_component_lookup(mca_mpool_openib_component.vapi_allocator_name);
if(NULL == allocator_component) {
if(opal_list_get_size(&mca_allocator_base_components) == 0) {
mca_base_component_list_item_t* item = (mca_base_component_list_item_t*)
opal_list_get_first(&mca_allocator_base_components);
allocator_component = (mca_allocator_base_component_t*)item->cli_component;
opal_output(0, "mca_mpool_openib_init: unable to locate allocator: %s - using %s\n",
mca_mpool_openib_component.vapi_allocator_name, allocator_component->allocator_version.mca_component_name);
} else {
opal_output(0, "mca_mpool_openib_init: unable to locate allocator: %s\n",
mca_mpool_openib_component.vapi_allocator_name);
return NULL;
}
}
mpool_module = (mca_mpool_openib_module_t*)malloc(sizeof(mca_mpool_openib_module_t));
mca_mpool_openib_module_init(mpool_module);
mpool_module->resources = *resources;
mpool_module->vapi_allocator =
allocator_component->allocator_init(true, mca_common_vapi_segment_alloc, NULL, &mpool_module->super);
if(NULL == mpool_module->vapi_allocator) {
opal_output(0, "mca_mpool_openib_init: unable to initialize allocator");
return NULL;
}
return &mpool_module->super;
}

Просмотреть файл

@ -19,7 +19,8 @@
#include "opal/util/output.h"
#include "mca/mpool/openib/mpool_openib.h"
#include <infiniband/verbs.h>
#include <errno.h>
#include <string.h>
/*
* Initializes the mpool module.
*/
@ -46,9 +47,18 @@ void* mca_mpool_openib_alloc(
mca_mpool_base_registration_t** registration)
{
mca_mpool_openib_module_t* mpool_openib = (mca_mpool_openib_module_t*)mpool;
return mpool_openib->vapi_allocator->alc_alloc(mpool_openib->vapi_allocator, size, align, registration);
}
/* void* addr_malloc = (void*)malloc((*size) + mca_mpool_openib_component.page_size); */
/* void* addr = (void*) ALIGN_ADDR(addr_malloc, mca_mpool_openib_component.page_size_log); */
void* addr_malloc = (void*)memalign(mca_mpool_openib_component.page_size, size);
void* addr = addr_malloc;
if(OMPI_SUCCESS != mpool->mpool_register(mpool, addr, size, registration)) {
free(addr_malloc);
return NULL;
}
return addr;
}
/*
* register memory
@ -57,6 +67,7 @@ int mca_mpool_openib_register(mca_mpool_base_module_t* mpool,
void *addr,
size_t size,
mca_mpool_base_registration_t** registration){
mca_mpool_openib_module_t * mpool_module = (mca_mpool_openib_module_t*) mpool;
mca_mpool_openib_registration_t * vapi_reg;
@ -69,12 +80,13 @@ int mca_mpool_openib_register(mca_mpool_base_module_t* mpool,
mpool_module->resources.ib_pd,
addr,
size,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE
IBV_ACCESS_REMOTE_WRITE
/* IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE */
);
if(NULL == vapi_reg->mr){
opal_output(0, "%s: error registering openib memory\n", __func__);
opal_output(0, "%s: error registering openib memory of size %lu errno says %s\n", __func__, size, strerror(errno));
return OMPI_ERROR;
}
@ -95,10 +107,10 @@ int mca_mpool_openib_deregister(mca_mpool_base_module_t* mpool, void *addr, size
mca_mpool_openib_registration_t * openib_reg;
openib_reg = (mca_mpool_openib_registration_t*) registration;
if(! ibv_dereg_mr(openib_reg->mr)){
opal_output(0, "%s: error unpinning openib memory\n", __func__);
opal_output(0, "%s: error unpinning openib memory errno says %s\n", __func__, strerror(errno));
return OMPI_ERROR;
}
free(registration);
return OMPI_SUCCESS;
}