1
1
openmpi/ompi/mca/common/sm/common_sm_rml.c
Gilles Gouaillardet 3e19041425 Fix memory leak in mca_common_sm_rml_info_bcast(...)
The local buffer object is only used by the root of the group,
so move down the buffer declaration and initialization.

cmr=v1.8.2:reviewer=rhc

This commit was SVN r31771.
2014-05-15 08:41:14 +00:00

181 строка
6.4 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/types.h"
#include "opal/dss/dss.h"
#include "opal/util/show_help.h"
#include "ompi/constants.h"
#include "ompi/mca/rte/rte.h"
#include "ompi/mca/common/sm/common_sm_rml.h"
/* only for debug purposes only */
#include <assert.h>
#ifdef HAVE_STRING_H
#include <string.h>
#endif
typedef struct {
opal_buffer_t buf;
bool active;
int status;
} sm_return_t;
static void sml_recv(int status,
ompi_process_name_t* peer,
opal_buffer_t* buffer,
ompi_rml_tag_t tag,
void* cbdata)
{
sm_return_t *smr = (sm_return_t*)cbdata;
opal_dss.copy_payload(&smr->buf, buffer);
smr->active = false;
smr->status = status;
}
/* ////////////////////////////////////////////////////////////////////////// */
/**
* this routine assumes that sorted_procs is in the following state:
* o all the local procs at the beginning.
* o procs[0] is the lowest named process.
*/
int
mca_common_sm_rml_info_bcast(opal_shmem_ds_t *out_ds_buf,
ompi_proc_t **procs,
size_t num_local_procs,
int tag,
bool proc0,
char *msg_id_str)
{
int rc = OMPI_SUCCESS, tmprc;
char *msg_id_str_to_tx = NULL;
sm_return_t smr;
OBJ_CONSTRUCT(&smr.buf, opal_buffer_t);
/* figure out if i am the root proc in the group. if i am, bcast the
* message the rest of the local procs. */
if (proc0) {
opal_buffer_t *buffer = NULL;
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
size_t p;
/* pack the data that we are going to send. first the queueing id, then
* the shmem_ds buf. note that msg_id_str is used only for verifying
* "expected" common sm usage. see "RML Messaging and Our Assumptions"
* note in common_sm.c for more details. */
tmprc = opal_dss.pack(buffer, &msg_id_str, 1, OPAL_STRING);
if (OPAL_SUCCESS != tmprc) {
OMPI_ERROR_LOG(OMPI_ERR_PACK_FAILURE);
rc = OMPI_ERR_PACK_FAILURE;
goto out;
}
tmprc = opal_dss.pack(buffer, out_ds_buf,
(int32_t)sizeof(opal_shmem_ds_t),
OPAL_BYTE);
if (OPAL_SUCCESS != tmprc) {
OMPI_ERROR_LOG(OMPI_ERR_PACK_FAILURE);
rc = OMPI_ERR_PACK_FAILURE;
goto out;
}
opal_progress_event_users_increment();
/* first num_local_procs items should be local procs */
for (p = 1; p < num_local_procs; ++p) {
/* a potential future optimization: use non-blocking routines */
tmprc = ompi_rte_send_buffer_nb(&(procs[p]->proc_name), buffer, tag,
ompi_rte_send_cbfunc, NULL);
if (0 > tmprc) {
OMPI_ERROR_LOG(tmprc);
opal_progress_event_users_decrement();
rc = OMPI_ERROR;
goto out;
}
}
opal_progress_event_users_decrement();
}
/* i am NOT the root proc */
else {
int32_t num_vals;
/* bump up the libevent polling frequency while we're in this RML recv,
* just to ensure we're checking libevent frequently. */
opal_progress_event_users_increment();
smr.active = true;
smr.status = OMPI_ERROR;
ompi_rte_recv_buffer_nb(&(procs[0]->proc_name),tag,
OMPI_RML_NON_PERSISTENT,
sml_recv, &smr);
while (smr.active) {
opal_progress();
}
opal_progress_event_users_decrement();
if (OMPI_SUCCESS != smr.status) {
OMPI_ERROR_LOG(smr.status);
rc = smr.status;
goto out;
}
/* unpack the buffer */
num_vals = 1;
tmprc = opal_dss.unpack(&smr.buf, &msg_id_str_to_tx, &num_vals,
OPAL_STRING);
if (0 > tmprc) {
OMPI_ERROR_LOG(OMPI_ERR_UNPACK_FAILURE);
rc = OMPI_ERROR;
goto out;
}
num_vals = (int32_t)sizeof(opal_shmem_ds_t);
tmprc = opal_dss.unpack(&smr.buf, out_ds_buf, &num_vals, OPAL_BYTE);
if (0 > tmprc) {
OMPI_ERROR_LOG(OMPI_ERR_UNPACK_FAILURE);
rc = OMPI_ERROR;
goto out;
}
/* the message better be for me. if not, freak out because this
* probably means that common sm is being used in a new way that lies
* outside of our current scope of assumptions. see "RML Messaging and
* Our Assumptions" note in common_sm.c */
if (0 != strcmp(msg_id_str_to_tx, msg_id_str)) {
opal_show_help("help-mpi-common-sm.txt", "unexpected message id",
true, ompi_process_info.nodename,
msg_id_str, msg_id_str_to_tx);
rc = OMPI_ERROR;
/* here for extra debug info only */
assert(0);
goto out;
}
}
out:
if (NULL != msg_id_str_to_tx) {
free(msg_id_str_to_tx);
msg_id_str_to_tx = NULL;
}
OBJ_DESTRUCT(&smr.buf);
return rc;
}