1
1

Merge pull request #536 from nkogteva/ud_fixes

oob ud: fixes and parameter adjustment
Этот коммит содержится в:
Mike Dubman 2015-04-17 18:25:53 +03:00
родитель bfacb5dd73 c2678b0cc9
Коммит 9a5a5111e6
4 изменённых файлов: 25 добавлений и 16 удалений

Просмотреть файл

@ -103,7 +103,7 @@ static inline void mca_oob_ud_fill_sge (struct ibv_sge *sge, void *addr,
struct mca_oob_ud_device_t {
opal_list_item_t super;
struct ibv_device_attr attr;
struct ibv_context *ib_context;
struct ibv_comp_channel *ib_channel;
struct ibv_pd *ib_pd;

Просмотреть файл

@ -213,7 +213,6 @@ static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
struct ibv_device *ib_device)
{
int rc, port_num;
struct ibv_device_attr dev_attr;
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup attempting to setup ib device %p",
@ -237,7 +236,7 @@ static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
return ORTE_ERROR;
}
rc = ibv_query_device (device->ib_context, &dev_attr);
rc = ibv_query_device (device->ib_context, &device->attr);
if (0 != rc) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:device_setup error querying device. errno = %d",
@ -261,7 +260,7 @@ static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
return ORTE_ERROR;
}
for (port_num = 1 ; port_num <= dev_attr.phys_port_cnt ; ++port_num) {
for (port_num = 1 ; port_num <= device->attr.phys_port_cnt ; ++port_num) {
mca_oob_ud_port_t *port = OBJ_NEW(mca_oob_ud_port_t);
if (NULL == port) {

Просмотреть файл

@ -12,6 +12,7 @@
*
*/
#include "oob_ud_component.h"
#include "oob_ud_qp.h"
#include "oob_ud.h"
@ -72,12 +73,16 @@ int mca_oob_ud_qp_init (mca_oob_ud_qp_t *qp, struct mca_oob_ud_port_t *port,
init_attr.send_cq = qp->ib_send_cq;
init_attr.recv_cq = qp->ib_recv_cq;
init_attr.cap.max_send_sge = 32;
init_attr.cap.max_recv_sge = 32; /* GRH, data */
mca_oob_ud_device_t *device = (mca_oob_ud_device_t *) opal_list_get_first (&mca_oob_ud_component.ud_devices);
opal_output_verbose(80, orte_oob_base_framework.framework_output,
"%s oob:ud:qp_init create queue pair for device: device->attr.max_sge = %d, device->attr.max_qp_wr = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), device->attr.max_sge, device->attr.max_qp_wr);
init_attr.cap.max_send_sge = 1;
init_attr.cap.max_recv_sge = 2; /* GRH, data */
init_attr.cap.max_inline_data = 0; /* don't use inline data for now */
/* NTH: fix these */
init_attr.cap.max_recv_wr = 4096;
init_attr.cap.max_send_wr = 4096;
init_attr.cap.max_recv_wr = min(4096, device->attr.max_qp_wr);
init_attr.cap.max_send_wr = min(4096, device->attr.max_qp_wr);
qp->ib_qp = ibv_create_qp (port->device->ib_pd, &init_attr);
if (NULL == qp->ib_qp) {
@ -258,6 +263,7 @@ int mca_oob_ud_qp_post_send (mca_oob_ud_qp_t *qp, struct ibv_send_wr *wr,
}
int mca_oob_ud_qp_post_recv (mca_oob_ud_qp_t *qp, struct ibv_recv_wr *wr) {
struct ibv_recv_wr *bad_wr;
int rc;
@ -265,22 +271,23 @@ int mca_oob_ud_qp_post_recv (mca_oob_ud_qp_t *qp, struct ibv_recv_wr *wr) {
if (0 != rc) {
opal_output (0, "%s oob:ud:qp_post_recv failed. errno = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
int mca_oob_ud_qp_data_aquire (struct mca_oob_ud_port_t *port, mca_oob_ud_qp_t **qp_ptr) {
int rc;
int rc = ORTE_SUCCESS;
opal_free_list_item_t *item;
do {
item = opal_free_list_get_st (&port->data_qps);
if (NULL == item) {
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"%s oob:ud:qp_data_aquire error allocating new data qp",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
rc = ORTE_ERR_OUT_OF_RESOURCE;
"%s oob:ud:qp_data_aquire error allocating new data qp. error = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc);
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
break;
}

Просмотреть файл

@ -309,7 +309,8 @@ int mca_oob_ud_send_try (mca_oob_ud_req_t *send_req) {
const unsigned int mtu = send_req->req_mtu;
const struct timeval aquire_timeout = {0, 500000};
mca_oob_ud_msg_t *com_msg;
int data_len, rc;
int data_len;
int rc = ORTE_SUCCESS;
opal_output_verbose(10, orte_oob_base_framework.framework_output,
"%s oob:ud:send_try sending to %s, tag = %d, "
@ -504,7 +505,8 @@ int mca_oob_ud_send_try (mca_oob_ud_req_t *send_req) {
/* send data */
rc = mca_oob_ud_qp_post_send (send_req->req_qp, send_req->req_wr.send, 0);
if (ORTE_SUCCESS != rc) {
opal_output (0, "error posting send!");
opal_output (0, "%s oob:ud:send_try error posting send!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
break;
}
@ -532,7 +534,8 @@ int mca_oob_ud_send_try (mca_oob_ud_req_t *send_req) {
}
if (ORTE_SUCCESS != rc) {
opal_output (0, "send error! rc = %d", rc);
opal_output (0, "%s oob:ud:send_try send error! rc = %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc);
/* damn */
return mca_oob_ud_send_complete (send_req, rc);
}