1
1
openmpi/ompi/mca/common/sm/common_sm_rml.c
Samuel Gutierrez 0867454a06 Fixes CID #1665.
This commit was SVN r24519.
2011-03-12 03:41:49 +00:00

184 строки
6.6 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "orte/mca/rml/rml.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/constants.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/common/sm/common_sm_rml.h"
OBJ_CLASS_INSTANCE(
mca_common_sm_rml_pending_rml_msg_types_t,
opal_object_t,
NULL,
NULL
);
/******************************************************************************/
/**
* this routine assumes that sorted_procs is in the following state:
* o all the local procs at the beginning.
* o sorted_procs[0] is the lowest named process.
*/
int
mca_common_sm_rml_info_bcast(mca_common_sm_rml_sm_info_t *sm_info,
ompi_proc_t **procs,
size_t num_procs,
int tag,
bool bcast_root,
const char *file_name,
opal_list_t *pending_rml_msgs)
{
int rc = OMPI_SUCCESS;
struct iovec iov[MCA_COMMON_SM_RML_MSG_LEN];
int iovrc;
size_t p;
char filename_to_send[OPAL_PATH_MAX];
strncpy(filename_to_send, file_name, sizeof(filename_to_send) - 1);
/* let the first item be the queueing id name */
iov[0].iov_base = (ompi_iov_base_ptr_t)filename_to_send;
iov[0].iov_len = sizeof(filename_to_send);
iov[1].iov_base = (ompi_iov_base_ptr_t)sm_info;
iov[1].iov_len = sizeof(mca_common_sm_rml_sm_info_t);
/**
* figure out if i am the root proc in the group.
* if i am, bcast the message the rest of the local procs.
*/
if (bcast_root)
{
opal_progress_event_users_increment();
/* first num_procs items should be local procs */
for (p = 1; p < num_procs; ++p)
{
iovrc = orte_rml.send(&(procs[p]->proc_name),
iov,
MCA_COMMON_SM_RML_MSG_LEN,
tag,
0);
if ((ssize_t)(iov[0].iov_len +
iov[1].iov_len) > iovrc)
{
ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE);
opal_progress_event_users_decrement();
rc = OMPI_ERROR;
goto out;
}
}
opal_progress_event_users_decrement();
}
else /* i am NOT the root ("lowest") proc */
{
opal_list_item_t *item;
mca_common_sm_rml_pending_rml_msg_types_t *rml_msg;
/**
* because a component query can be performed simultaneously in multiple
* threads, the RML messages may arrive in any order. so first check to
* see if we previously received a message for me.
*/
for (item = opal_list_get_first(pending_rml_msgs);
opal_list_get_end(pending_rml_msgs) != item;
item = opal_list_get_next(item))
{
rml_msg = (mca_common_sm_rml_pending_rml_msg_types_t *)item;
/* was the message for me? */
if (0 == strcmp(rml_msg->rml_file_name, file_name))
{
opal_list_remove_item(pending_rml_msgs, item);
memcpy(sm_info->posix_fname_buff, rml_msg->posix_fname_buff,
OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
sm_info->id = rml_msg->id;
OBJ_RELEASE(item);
break;
}
}
/**
* if we didn't find a message already waiting, block on
* receiving from the RML.
*/
if (opal_list_get_end(pending_rml_msgs) == item)
{
do
{
/**
* bump up the libevent polling frequency while we're
* in this RML recv, just to ensure we're checking
* libevent frequently.
*/
opal_progress_event_users_increment();
iovrc = orte_rml.recv(&(procs[0]->proc_name),
iov,
MCA_COMMON_SM_RML_MSG_LEN,
tag,
0);
opal_progress_event_users_decrement();
if (iovrc < 0)
{
ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED);
rc = OMPI_ERROR;
goto out;
}
/* was the message for me? if so, we're done */
if (0 == strcmp(filename_to_send, file_name))
{
break;
}
/* if not, put it on the pending list and try again */
if (NULL == (rml_msg =
OBJ_NEW(mca_common_sm_rml_pending_rml_msg_types_t)))
{
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
rc = OMPI_ERROR;
goto out;
}
/* safe because sizeof(rml_msg->file_name) ==
* sizeof(filename_to_send), same same goes for
* rml_msg->posix_fname_buff and sm_info->posix_fname_buff */
memcpy(rml_msg->rml_file_name,
filename_to_send,
OPAL_PATH_MAX);
memcpy(rml_msg->posix_fname_buff,
sm_info->posix_fname_buff,
OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
rml_msg->id = sm_info->id;
opal_list_append(pending_rml_msgs, &(rml_msg->super));
} while(1);
}
}
out:
return rc;
}