From 656d8578d0bf0687c65c07b0ce2e63e014c3f92b Mon Sep 17 00:00:00 2001 From: Jeff Squyres <jsquyres@cisco.com> Date: Fri, 6 Feb 2009 00:51:04 +0000 Subject: [PATCH] * Rename (new) MCA parameter to btl_openib_connect_rdmacm_reject_causes_connect_error (yes, it's still long -- on purpose :-) ) * Add INI file parameter rdmacm_reject_causes_connect_error * Now only treat CONNECT_ERROR events as a REJECT if: * It's on a connection where we were expecting a REJECT, ''and'' * The MCA parameter is true ''or'' the INI parameter for this device is true * Set the INI parameter for true for the NE020 This commit was SVN r20459. --- ompi/mca/btl/openib/btl_openib_ini.c | 19 ++++++- ompi/mca/btl/openib/btl_openib_ini.h | 5 +- .../connect/btl_openib_connect_rdmacm.c | 52 ++++++++++++++----- .../openib/mca-btl-openib-device-params.ini | 21 +++++++- 4 files changed, 81 insertions(+), 16 deletions(-) diff --git a/ompi/mca/btl/openib/btl_openib_ini.c b/ompi/mca/btl/openib/btl_openib_ini.c index 2f2db87024..3f5bacce26 100644 --- a/ompi/mca/btl/openib/btl_openib_ini.c +++ b/ompi/mca/btl/openib/btl_openib_ini.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * @@ -401,6 +401,13 @@ static int parse_line(parsed_section_values_t *sv) sv->values.max_inline_data_set = true; } + else if (0 == strcasecmp(key_buffer, "rdmacm_reject_causes_connect_error")) { + /* Single value */ + sv->values.rdmacm_reject_causes_connect_error = + (bool) ompi_btl_openib_ini_intify(value); + sv->values.rdmacm_reject_causes_connect_error_set = true; + } + else { /* Have no idea what this parameter is. Not an error -- just ignore it */ @@ -490,6 +497,9 @@ static void reset_values(ompi_btl_openib_ini_values_t *v) v->max_inline_data = 0; v->max_inline_data_set = false; + + v->rdmacm_reject_causes_connect_error = false; + v->rdmacm_reject_causes_connect_error_set = false; } @@ -546,6 +556,13 @@ static int save_section(parsed_section_values_t *s) h->values.max_inline_data_set = true; } + if (s->values.rdmacm_reject_causes_connect_error_set) { + h->values.rdmacm_reject_causes_connect_error = + s->values.rdmacm_reject_causes_connect_error; + h->values.rdmacm_reject_causes_connect_error_set = + true; + } + found = true; break; } diff --git a/ompi/mca/btl/openib/btl_openib_ini.h b/ompi/mca/btl/openib/btl_openib_ini.h index 130594889b..0b8e4b2ed5 100644 --- a/ompi/mca/btl/openib/btl_openib_ini.h +++ b/ompi/mca/btl/openib/btl_openib_ini.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Mellanox Technologies. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,9 @@ typedef struct ompi_btl_openib_ini_values_t { int32_t max_inline_data; bool max_inline_data_set; + + bool rdmacm_reject_causes_connect_error; + bool rdmacm_reject_causes_connect_error_set; } ompi_btl_openib_ini_values_t; diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index a8d49ad3ca..5a24ca55a3 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -37,6 +37,7 @@ #include "btl_openib_endpoint.h" #include "connect/connect.h" #include "btl_openib_iwarp.h" +#include "btl_openib_ini.h" /* JMS to be removed: see #1264 */ #undef event @@ -245,8 +246,8 @@ static void rdmacm_component_register(void) } mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version, - "connect_rdmacm_ignore_connect_errors", - "Some devices do not implement all aspects of the RDMA CM properly (e.g., REJECTs are not handled properly). Setting this MCA parameter to true tells Open MPI to ignore RDMA CM CONNECT_ERROR events (default: false).", + "connect_rdmacm_reject_causes_connect_error", + "The drivers for some devices are buggy such that an RDMA REJECT action may result in a CONNECT_ERROR event instead of a REJECTED event. Setting this MCA parameter to true tells Open MPI to treat CONNECT_ERROR events on connections where a REJECT is expected as a REJECT (default: false)", false, false, 0, &value); rdmacm_ignore_connect_errors = (bool) (value != 0); } @@ -1212,9 +1213,11 @@ out: /* * Runs in service thread */ -static int create_dummy_cq(rdmacm_contents_t *contents, mca_btl_openib_module_t *openib_btl) +static int create_dummy_cq(rdmacm_contents_t *contents, + mca_btl_openib_module_t *openib_btl) { - contents->dummy_cq = ibv_create_cq(openib_btl->device->ib_dev_context, 1, NULL, NULL, 0); + contents->dummy_cq = + ibv_create_cq(openib_btl->device->ib_dev_context, 1, NULL, NULL, 0); if (NULL == contents->dummy_cq) { BTL_ERROR(("dummy_cq not created")); goto out; @@ -1228,7 +1231,8 @@ out: /* * Runs in service thread */ -static int create_dummy_qp(rdmacm_contents_t *contents, struct rdma_cm_id *id, int qpnum) +static int create_dummy_qp(rdmacm_contents_t *contents, + struct rdma_cm_id *id, int qpnum) { struct ibv_qp_init_attr attr; struct ibv_qp *qp; @@ -1446,6 +1450,8 @@ static int event_handler(struct rdma_cm_event *event) struct sockaddr *peeraddr, *localaddr; uint32_t peeripaddr, localipaddr; int rc = -1, qpnum; + ompi_btl_openib_ini_values_t ini; + bool found; if (NULL == context) { return rc; @@ -1498,14 +1504,34 @@ static int event_handler(struct rdma_cm_event *event) break; case RDMA_CM_EVENT_CONNECT_ERROR: - /* Workaround for broken NetEffect/Intel driver: if we get a - CONNECT_ERROR on a connection that we're expecting a reject - on, then it's ok (their driver just doesn't handle reject - properly at all). */ - if (rdmacm_ignore_connect_errors) { - OPAL_OUTPUT((-1, "SERVICE Got CONNECT_ERROR, but ignored: %p", (void*) event->id)); - rc = rdmacm_destroy_dummy_qp(context); - break; + /* Some adapters have broken REJECT behavior; the recipient + gets a CONNECT_ERROR event instead of the expected REJECTED + event. So if we get a CONNECT_ERROR, see if it's on a + connection that we're expecting a REJECT (i.e., we have a + dummy_cq setup). If it is, and if a) the MCA param + btl_openib_connect_rdmacm_reject_causes_connect_error is + true, or b) if rdmacm_reject_causes_connect_error set on + the device INI values, then just treat this CONNECT_ERROR + as if it were the REJECT. */ + if (NULL != context->contents->dummy_cq) { + struct ibv_device_attr *attr = + &(context->endpoint->endpoint_btl->device->ib_dev_attr); + found = false; + if (OMPI_SUCCESS == ompi_btl_openib_ini_query(attr->vendor_id, + attr->vendor_part_id, + &ini) && + ini.rdmacm_reject_causes_connect_error) { + found = true; + } + if (rdmacm_ignore_connect_errors) { + found = true; + } + + if (found) { + OPAL_OUTPUT((-1, "SERVICE Got CONNECT_ERROR, but ignored: %p", (void*) event->id)); + rc = rdmacm_destroy_dummy_qp(context); + break; + } } /* Otherwise, fall through and handle the error as normal */ diff --git a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini index 8d28d0377c..8b6b21a91e 100644 --- a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini +++ b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini @@ -1,5 +1,5 @@ # -# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Mellanox Technologies. All rights reserved. # $COPYRIGHT$ # @@ -92,6 +92,24 @@ # max_inline_data = 1024 +# rdmacm_reject_causes_connect_error: a boolean indicating whether +# when an RDMA CM REJECT is issued on the device, instead of getting +# the expected REJECT event back, you might get a CONNECT_ERROR event. +# Open MPI uses RDMA CM REJECT messages in its normal wireup +# procedure; some connections are *expected* to be rejected. However, +# with some old/broken drivers, if process A issues a REJECT, process +# B will receive a CONNECT_ERROR event instead of a REJECT event. So +# if this flag is set to true and we receive a CONNECT_ERROR event on +# a connection where we are expecting a REJECT, then just treat the +# CONNECT_ERROR exactly as we would have treated the REJECT. Setting +# this flag to true means that the driver for this device is broken +# (or was broken at one point in the past). It is [mostly] safe to +# set this flag to true even after a driver has been fixed; the scope +# of where this flag is used is small enough that it *shouldn't* +# mask real CONNECT_ERROR events. + +# rdmacm_reject_causes_connect_error = 1 + ############################################################################ [default] @@ -207,3 +225,4 @@ use_eager_rdma = 1 mtu = 2048 receive_queues = P,128,256,192,128:P,65536,256,192,128 max_inline_data = 64 +rdmacm_reject_causes_connect_error = 1