diff --git a/opal/mca/btl/usnic/btl_usnic_cagent.c b/opal/mca/btl/usnic/btl_usnic_cagent.c index 8bd6812c91..6bb4fd7f52 100644 --- a/opal/mca/btl/usnic/btl_usnic_cagent.c +++ b/opal/mca/btl/usnic/btl_usnic_cagent.c @@ -176,6 +176,16 @@ static void udp_port_listener_constructor(agent_udp_port_listener_t *obj) static void udp_port_listener_destructor(agent_udp_port_listener_t *obj) { + /* Find any pings that are pending on this listener and delete + them */ + agent_ping_t *ap, *apnext; + OPAL_LIST_FOREACH_SAFE(ap, apnext, &pings_pending, agent_ping_t) { + if (ap->src_ipv4_addr == obj->ipv4_addr) { + opal_list_remove_item(&pings_pending, &ap->super); + OBJ_RELEASE(ap); + } + } + if (-1 != obj->fd) { close(obj->fd); } @@ -925,6 +935,35 @@ static void agent_thread_cmd_ping(agent_ipc_listener_t *ipc_listener) agent_thread_send_ping(0, 0, ap); } +/* + * Receive and process the rest of an UNLISTEN command from a local IPC + * client. + */ +static void agent_thread_cmd_unlisten(agent_ipc_listener_t *ipc_listener) +{ + /* Read the rest of the UNLISTEN command from the IPC socket */ + int ret; + opal_btl_usnic_connectivity_cmd_unlisten_t cmd; + ret = opal_fd_read(ipc_listener->client_fd, sizeof(cmd), &cmd); + if (OPAL_SUCCESS != ret) { + OPAL_ERROR_LOG(ret); + ABORT("usnic connectivity agent IPC UNLISTEN read failed"); + /* Will not return */ + } + + /* If we are listening on this address (and we should be), then + stop listening on it. */ + uint32_t udp_port; + agent_udp_port_listener_t *udp_listener; + udp_listener = agent_thread_find_listener(cmd.ipv4_addr, &udp_port); + if (NULL != udp_listener) { + OBJ_RELEASE(udp_listener); + } + + /* All done! */ + return; +} + /* * Called when we get an incoming IPC message */ @@ -947,7 +986,8 @@ static void agent_thread_ipc_receive(int fd, short flags, void *context) } assert(CONNECTIVITY_AGENT_CMD_LISTEN == command || - CONNECTIVITY_AGENT_CMD_PING == command); + CONNECTIVITY_AGENT_CMD_PING == command || + CONNECTIVITY_AGENT_CMD_UNLISTEN == command); switch (command) { case CONNECTIVITY_AGENT_CMD_LISTEN: @@ -956,6 +996,9 @@ static void agent_thread_ipc_receive(int fd, short flags, void *context) case CONNECTIVITY_AGENT_CMD_PING: agent_thread_cmd_ping(ipc_listener); break; + case CONNECTIVITY_AGENT_CMD_UNLISTEN: + agent_thread_cmd_unlisten(ipc_listener); + break; default: ABORT("Unexpected connectivity agent command"); break; diff --git a/opal/mca/btl/usnic/btl_usnic_cclient.c b/opal/mca/btl/usnic/btl_usnic_cclient.c index 16916790fc..65afd5f44e 100644 --- a/opal/mca/btl/usnic/btl_usnic_cclient.c +++ b/opal/mca/btl/usnic/btl_usnic_cclient.c @@ -247,6 +247,44 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port, } +/* + * Send an unlisten command to the agent + */ +int opal_btl_usnic_connectivity_unlisten(opal_btl_usnic_module_t *module) +{ + /* If connectivity checking is not enabled, do nothing */ + if (!mca_btl_usnic_component.connectivity_enabled) { + return OPAL_SUCCESS; + } + /* Only the MPI process who is also the agent will send the + UNLISTEN command */ + if (0 != opal_process_info.my_local_rank) { + return OPAL_SUCCESS; + } + + /* Send the UNLISTEN command */ + int id = CONNECTIVITY_AGENT_CMD_UNLISTEN; + if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(id), &id)) { + OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO); + ABORT("usnic connectivity client IPC write failed"); + /* Will not return */ + } + + /* Send the UNLISTEN command parameters */ + opal_btl_usnic_connectivity_cmd_unlisten_t cmd = { + .ipv4_addr = module->local_addr.ipv4_addr, + }; + + if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) { + OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO); + ABORT("usnic connectivity client IPC write failed"); + /* Will not return */ + } + + return OPAL_SUCCESS; +} + + /* * Shut down the connectivity client */ diff --git a/opal/mca/btl/usnic/btl_usnic_connectivity.h b/opal/mca/btl/usnic/btl_usnic_connectivity.h index 1bf134a2ee..db54ae5ff5 100644 --- a/opal/mca/btl/usnic/btl_usnic_connectivity.h +++ b/opal/mca/btl/usnic/btl_usnic_connectivity.h @@ -94,6 +94,7 @@ struct opal_btl_usnic_module_t; enum { CONNECTIVITY_AGENT_CMD_LISTEN = 17, CONNECTIVITY_AGENT_CMD_PING, + CONNECTIVITY_AGENT_CMD_UNLISTEN, CONNECTIVITY_AGENT_CMD_MAX }; @@ -125,6 +126,14 @@ typedef struct { uint8_t mac[6]; } opal_btl_usnic_connectivity_cmd_listen_t; +/* + * Fields for the UNLISTEN command. This struct is sent down the IPC + * socket from the cclient to the cagent. + */ +typedef struct { + uint32_t ipv4_addr; +} opal_btl_usnic_connectivity_cmd_unlisten_t; + /* * Command+fields for the reply to the LISTEN command. This struct is * sent down the IPC socket from the cagent to the cclient. @@ -169,7 +178,7 @@ int opal_btl_usnic_connectivity_client_init(void); * @returns OPAL_SUCCESS or an OPAL error code. * * The module contains the local interface addressing information, - * which tells the agent one which interface to listen. + * which tells the agent on which interface to listen. * * This routine will request the new listen from the agent, and wait * for the agent to reply with the UDP port that is being used/was @@ -213,6 +222,21 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port, uint8_t *dest_mac, char *dest_nodename, size_t mtu); +/** + * Tell the agent to stop listening on the given IP address. + * + * @params[in] module The module that is requesting the unlisten. + * + * @returns OPAL_SUCCESS or an OPAL error code. + * + * The module contains the local interface addressing information, + * which tells the agent on which interface to stop listening. + * + * It is safe to call this function even if the connectivity check is + * disabled; it will be a no-op in this case. + */ +int opal_btl_usnic_connectivity_unlisten(struct opal_btl_usnic_module_t *module); + /** * Shut down the connectivity service client. * diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 268f5c500a..b0cd7c0d01 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -1123,6 +1123,10 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl) module->device_async_event_active = false; } + if (mca_btl_usnic_component.connectivity_enabled) { + opal_btl_usnic_connectivity_unlisten(module); + } + opal_btl_usnic_channel_finalize(module, &module->mod_channels[USNIC_DATA_CHANNEL]); opal_btl_usnic_channel_finalize(module,