Merge pull request #536 from nkogteva/ud_fixes
oob ud: fixes and parameter adjustment
Этот коммит содержится в:
Коммит
9a5a5111e6
@ -103,7 +103,7 @@ static inline void mca_oob_ud_fill_sge (struct ibv_sge *sge, void *addr,
|
||||
|
||||
struct mca_oob_ud_device_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
struct ibv_device_attr attr;
|
||||
struct ibv_context *ib_context;
|
||||
struct ibv_comp_channel *ib_channel;
|
||||
struct ibv_pd *ib_pd;
|
||||
|
@ -213,7 +213,6 @@ static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
|
||||
struct ibv_device *ib_device)
|
||||
{
|
||||
int rc, port_num;
|
||||
struct ibv_device_attr dev_attr;
|
||||
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s oob:ud:device_setup attempting to setup ib device %p",
|
||||
@ -237,7 +236,7 @@ static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
rc = ibv_query_device (device->ib_context, &dev_attr);
|
||||
rc = ibv_query_device (device->ib_context, &device->attr);
|
||||
if (0 != rc) {
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s oob:ud:device_setup error querying device. errno = %d",
|
||||
@ -261,7 +260,7 @@ static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
for (port_num = 1 ; port_num <= dev_attr.phys_port_cnt ; ++port_num) {
|
||||
for (port_num = 1 ; port_num <= device->attr.phys_port_cnt ; ++port_num) {
|
||||
mca_oob_ud_port_t *port = OBJ_NEW(mca_oob_ud_port_t);
|
||||
|
||||
if (NULL == port) {
|
||||
|
@ -12,6 +12,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "oob_ud_component.h"
|
||||
#include "oob_ud_qp.h"
|
||||
#include "oob_ud.h"
|
||||
|
||||
@ -72,12 +73,16 @@ int mca_oob_ud_qp_init (mca_oob_ud_qp_t *qp, struct mca_oob_ud_port_t *port,
|
||||
init_attr.send_cq = qp->ib_send_cq;
|
||||
init_attr.recv_cq = qp->ib_recv_cq;
|
||||
|
||||
init_attr.cap.max_send_sge = 32;
|
||||
init_attr.cap.max_recv_sge = 32; /* GRH, data */
|
||||
mca_oob_ud_device_t *device = (mca_oob_ud_device_t *) opal_list_get_first (&mca_oob_ud_component.ud_devices);
|
||||
opal_output_verbose(80, orte_oob_base_framework.framework_output,
|
||||
"%s oob:ud:qp_init create queue pair for device: device->attr.max_sge = %d, device->attr.max_qp_wr = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), device->attr.max_sge, device->attr.max_qp_wr);
|
||||
|
||||
init_attr.cap.max_send_sge = 1;
|
||||
init_attr.cap.max_recv_sge = 2; /* GRH, data */
|
||||
init_attr.cap.max_inline_data = 0; /* don't use inline data for now */
|
||||
/* NTH: fix these */
|
||||
init_attr.cap.max_recv_wr = 4096;
|
||||
init_attr.cap.max_send_wr = 4096;
|
||||
init_attr.cap.max_recv_wr = min(4096, device->attr.max_qp_wr);
|
||||
init_attr.cap.max_send_wr = min(4096, device->attr.max_qp_wr);
|
||||
|
||||
qp->ib_qp = ibv_create_qp (port->device->ib_pd, &init_attr);
|
||||
if (NULL == qp->ib_qp) {
|
||||
@ -258,6 +263,7 @@ int mca_oob_ud_qp_post_send (mca_oob_ud_qp_t *qp, struct ibv_send_wr *wr,
|
||||
}
|
||||
|
||||
int mca_oob_ud_qp_post_recv (mca_oob_ud_qp_t *qp, struct ibv_recv_wr *wr) {
|
||||
|
||||
struct ibv_recv_wr *bad_wr;
|
||||
int rc;
|
||||
|
||||
@ -265,22 +271,23 @@ int mca_oob_ud_qp_post_recv (mca_oob_ud_qp_t *qp, struct ibv_recv_wr *wr) {
|
||||
if (0 != rc) {
|
||||
opal_output (0, "%s oob:ud:qp_post_recv failed. errno = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno);
|
||||
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_oob_ud_qp_data_aquire (struct mca_oob_ud_port_t *port, mca_oob_ud_qp_t **qp_ptr) {
|
||||
int rc;
|
||||
int rc = ORTE_SUCCESS;
|
||||
opal_free_list_item_t *item;
|
||||
|
||||
do {
|
||||
item = opal_free_list_get_st (&port->data_qps);
|
||||
if (NULL == item) {
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s oob:ud:qp_data_aquire error allocating new data qp",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||
"%s oob:ud:qp_data_aquire error allocating new data qp. error = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc);
|
||||
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -309,7 +309,8 @@ int mca_oob_ud_send_try (mca_oob_ud_req_t *send_req) {
|
||||
const unsigned int mtu = send_req->req_mtu;
|
||||
const struct timeval aquire_timeout = {0, 500000};
|
||||
mca_oob_ud_msg_t *com_msg;
|
||||
int data_len, rc;
|
||||
int data_len;
|
||||
int rc = ORTE_SUCCESS;
|
||||
|
||||
opal_output_verbose(10, orte_oob_base_framework.framework_output,
|
||||
"%s oob:ud:send_try sending to %s, tag = %d, "
|
||||
@ -504,7 +505,8 @@ int mca_oob_ud_send_try (mca_oob_ud_req_t *send_req) {
|
||||
/* send data */
|
||||
rc = mca_oob_ud_qp_post_send (send_req->req_qp, send_req->req_wr.send, 0);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output (0, "error posting send!");
|
||||
opal_output (0, "%s oob:ud:send_try error posting send!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -532,7 +534,8 @@ int mca_oob_ud_send_try (mca_oob_ud_req_t *send_req) {
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output (0, "send error! rc = %d", rc);
|
||||
opal_output (0, "%s oob:ud:send_try send error! rc = %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc);
|
||||
/* damn */
|
||||
return mca_oob_ud_send_complete (send_req, rc);
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user