Update the grpcomm framework to be more thread-friendly.
Modify the orte configure options to specify --enable-multicast such that it directs components to build or not instead of littering the code base with #if's. Remove those #if's where they used to occur. Add a new grpcomm "mcast" module to support multicast operations. Still some work required to properly perform daemon collectives for comm_spawn operations. New module only builds when --enable-multicast is provided, and when specifically selected. This commit was SVN r22709.
Этот коммит содержится в:
родитель
dd4945c194
Коммит
18c7aaff08
@ -99,10 +99,8 @@
|
||||
#include "orte/mca/ras/base/ras_private.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
#include "orte/mca/rmcast/rmcast.h"
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#endif
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
@ -345,7 +343,7 @@ void ompi_info_open_components(void)
|
||||
}
|
||||
map = OBJ_NEW(ompi_info_component_map_t);
|
||||
map->type = strdup("grpcomm");
|
||||
map->components = &mca_grpcomm_base_components_available;
|
||||
map->components = &orte_grpcomm_base.components_available;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
if (ORTE_SUCCESS != orte_ess_base_open()) {
|
||||
@ -405,7 +403,6 @@ void ompi_info_open_components(void)
|
||||
map->components = &orte_rmaps_base.available_components;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
if (ORTE_SUCCESS != orte_rmcast_base_open()) {
|
||||
goto error;
|
||||
}
|
||||
@ -413,7 +410,6 @@ void ompi_info_open_components(void)
|
||||
map->type = strdup("rmcast");
|
||||
map->components = &orte_rmcast_base.rmcast_opened;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
#endif
|
||||
|
||||
if (ORTE_SUCCESS != orte_rml_base_open()) {
|
||||
goto error;
|
||||
@ -659,10 +655,8 @@ void ompi_info_close_components()
|
||||
(void) orte_rml_base_close();
|
||||
(void) orte_routed_base_close();
|
||||
(void) mca_oob_base_close();
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
|
||||
(void) orte_rmcast_base_close();
|
||||
#endif
|
||||
#endif
|
||||
(void) orte_errmgr_base_close();
|
||||
|
||||
|
@ -222,9 +222,7 @@ int main(int argc, char *argv[])
|
||||
opal_pointer_array_add(&mca_types, "odls");
|
||||
opal_pointer_array_add(&mca_types, "ras");
|
||||
opal_pointer_array_add(&mca_types, "rmaps");
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
opal_pointer_array_add(&mca_types, "rmcast");
|
||||
#endif
|
||||
opal_pointer_array_add(&mca_types, "rml");
|
||||
opal_pointer_array_add(&mca_types, "routed");
|
||||
opal_pointer_array_add(&mca_types, "plm");
|
||||
|
@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
dnl University of Stuttgart. All rights reserved.
|
||||
dnl Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
dnl All rights reserved.
|
||||
dnl Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||
@ -92,7 +92,5 @@ else
|
||||
AC_MSG_RESULT([no])
|
||||
orte_want_multicast=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([ORTE_ENABLE_MULTICAST], [$orte_want_multicast],
|
||||
[Enable reliable multicast messaging])
|
||||
|
||||
])dnl
|
||||
|
@ -39,9 +39,7 @@
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/base/base.h"
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#endif
|
||||
#include "orte/mca/plm/plm.h"
|
||||
#include "orte/mca/filem/base/base.h"
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
@ -89,6 +87,18 @@ int orte_ess_base_app_setup(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* multicast */
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Group communications
|
||||
*/
|
||||
@ -103,20 +113,6 @@ int orte_ess_base_app_setup(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* multicast */
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_select";
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* non-daemon/HNP apps can only have the default proxy PLM
|
||||
* module open - provide a chance for it to initialize
|
||||
*/
|
||||
@ -255,13 +251,10 @@ int orte_ess_base_app_finalize(void)
|
||||
|
||||
orte_wait_finalize();
|
||||
|
||||
/* close the multicast */
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
orte_rmcast_base_close();
|
||||
#endif
|
||||
|
||||
/* now can close the rml and its friendly group comm */
|
||||
orte_grpcomm_base_close();
|
||||
/* close the multicast */
|
||||
orte_rmcast_base_close();
|
||||
orte_routed_base_close();
|
||||
orte_rml_base_close();
|
||||
|
||||
|
@ -58,9 +58,7 @@
|
||||
#include "orte/util/regex.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#endif
|
||||
|
||||
#include "orte/runtime/orte_cr.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
@ -180,6 +178,19 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
error = "orte_routed_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* multicast */
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Group communications
|
||||
*/
|
||||
@ -194,20 +205,6 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* multicast */
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_select";
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Open/select the odls */
|
||||
if (ORTE_SUCCESS != (ret = orte_odls_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -432,14 +429,11 @@ int orte_ess_base_orted_finalize(void)
|
||||
|
||||
/* now can close the rml and its friendly group comm */
|
||||
orte_grpcomm_base_close();
|
||||
/* close the multicast */
|
||||
orte_rmcast_base_close();
|
||||
orte_routed_base_close();
|
||||
orte_rml_base_close();
|
||||
|
||||
/* close the multicast */
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
orte_rmcast_base_close();
|
||||
#endif
|
||||
|
||||
|
||||
/* cleanup the global list of local children and job data */
|
||||
while (NULL != (item = opal_list_remove_first(&orte_local_children))) {
|
||||
OBJ_RELEASE(item);
|
||||
|
@ -43,9 +43,7 @@
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#endif
|
||||
|
||||
#include "orte/runtime/orte_cr.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
@ -93,7 +91,6 @@ int orte_ess_base_tool_setup(void)
|
||||
}
|
||||
|
||||
/* multicast */
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_open";
|
||||
@ -104,7 +101,6 @@ int orte_ess_base_tool_setup(void)
|
||||
error = "orte_rmcast_base_select";
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* since I am a tool, then all I really want to do is communicate.
|
||||
* So setup communications and be done - finding the HNP
|
||||
@ -203,6 +199,7 @@ int orte_ess_base_tool_finalize(void)
|
||||
}
|
||||
orte_routed_base_close();
|
||||
orte_rml_base_close();
|
||||
orte_rmcast_base_close();
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -70,7 +70,6 @@ orte_ess_cm_component_open(void)
|
||||
|
||||
int orte_ess_cm_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
char *spec;
|
||||
|
||||
/* only select us if specified */
|
||||
@ -83,12 +82,6 @@ int orte_ess_cm_component_query(mca_base_module_t **module, int *priority)
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t *)&orte_ess_cm_module;
|
||||
return ORTE_SUCCESS;
|
||||
#else
|
||||
/* cannot be used */
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -53,9 +53,7 @@
|
||||
#include "orte/mca/plm/base/base.h"
|
||||
#include "orte/mca/odls/base/base.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#endif
|
||||
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
@ -237,6 +235,19 @@ static int rte_init(void)
|
||||
error = "orte_routed_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* multicast */
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Group communications
|
||||
*/
|
||||
@ -250,21 +261,7 @@ static int rte_init(void)
|
||||
error = "orte_grpcomm_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* multicast */
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rmcast_base_select";
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Now provide a chance for the PLM
|
||||
* to perform any module-specific init functions. This
|
||||
* needs to occur AFTER the communications are setup
|
||||
@ -629,14 +626,12 @@ static int rte_finalize(void)
|
||||
orte_rmaps_base_close();
|
||||
orte_plm_base_close();
|
||||
orte_errmgr_base_close();
|
||||
|
||||
/* close the multicast */
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
orte_rmcast_base_close();
|
||||
#endif
|
||||
|
||||
/* now can close the rml and its friendly group comm */
|
||||
orte_grpcomm_base_close();
|
||||
|
||||
/* close the multicast */
|
||||
orte_rmcast_base_close();
|
||||
|
||||
/* now can close the rml */
|
||||
orte_routed_base_close();
|
||||
orte_rml_base_close();
|
||||
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -29,6 +29,7 @@ if !ORTE_DISABLE_FULL_SUPPORT
|
||||
libmca_grpcomm_la_SOURCES += \
|
||||
base/grpcomm_base_allgather.c \
|
||||
base/grpcomm_base_modex.c \
|
||||
base/grpcomm_base_coll.c
|
||||
base/grpcomm_base_coll.c \
|
||||
base/grpcomm_base_app_fns.c
|
||||
|
||||
endif
|
||||
|
@ -28,6 +28,10 @@
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
|
||||
@ -44,18 +48,35 @@ ORTE_DECLSPEC int orte_grpcomm_base_open(void);
|
||||
ORTE_DECLSPEC int orte_grpcomm_base_select(void);
|
||||
ORTE_DECLSPEC int orte_grpcomm_base_close(void);
|
||||
|
||||
/* daemon collective function */
|
||||
typedef void (*orte_grpcomm_daemon_collective_fn_t)(orte_process_name_t *sender,
|
||||
opal_buffer_t *data);
|
||||
/*
|
||||
* globals that might be needed
|
||||
*/
|
||||
typedef struct {
|
||||
int output;
|
||||
bool selected;
|
||||
opal_list_t components_available;
|
||||
orte_grpcomm_base_component_t selected_component;
|
||||
int profile_fd;
|
||||
orte_grpcomm_daemon_collective_fn_t daemon_coll;
|
||||
} orte_grpcomm_base_t;
|
||||
|
||||
ORTE_DECLSPEC extern int orte_grpcomm_base_output;
|
||||
ORTE_DECLSPEC extern bool mca_grpcomm_base_selected;
|
||||
ORTE_DECLSPEC extern opal_list_t mca_grpcomm_base_components_available;
|
||||
ORTE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_base_selected_component;
|
||||
ORTE_DECLSPEC extern int orte_grpcomm_profile_fd;
|
||||
ORTE_DECLSPEC extern orte_grpcomm_base_t orte_grpcomm_base;
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
/* structure for tracking collective operations */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_mutex_t lock;
|
||||
opal_condition_t cond;
|
||||
orte_vpid_t recvd;
|
||||
opal_buffer_t results;
|
||||
} orte_grpcomm_collective_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_grpcomm_collective_t);
|
||||
|
||||
/*
|
||||
* Base functions
|
||||
*/
|
||||
@ -80,13 +101,30 @@ ORTE_DECLSPEC int orte_grpcomm_base_update_modex_entries(orte_process_name_t *
|
||||
ORTE_DECLSPEC int orte_grpcomm_base_load_modex_data(orte_process_name_t *proc, char *attribute_name,
|
||||
void *data, int num_bytes);
|
||||
|
||||
/* app functions */
|
||||
ORTE_DECLSPEC int orte_grpcomm_base_app_barrier(orte_process_name_t *recipient,
|
||||
orte_grpcomm_collective_t *coll);
|
||||
ORTE_DECLSPEC int orte_grpcomm_base_app_allgather(orte_process_name_t *recipient,
|
||||
orte_grpcomm_collective_t *coll,
|
||||
opal_buffer_t *sbuf,
|
||||
opal_buffer_t *rbuf);
|
||||
ORTE_DECLSPEC int orte_grpcomm_base_app_pack_xcast(orte_daemon_cmd_flag_t cmd,
|
||||
orte_jobid_t job,
|
||||
opal_buffer_t *buffer,
|
||||
opal_buffer_t *message,
|
||||
orte_rml_tag_t tag);
|
||||
|
||||
/* Tuned collectives */
|
||||
ORTE_DECLSPEC void orte_grpcomm_base_coll_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
ORTE_DECLSPEC int orte_grpcomm_base_allgather(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_entries,
|
||||
orte_jobid_t jobid, orte_vpid_t np, orte_vpid_t *vpids);
|
||||
|
||||
ORTE_DECLSPEC void orte_grpcomm_base_daemon_coll_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
ORTE_DECLSPEC void orte_grpcomm_base_daemon_collective(orte_process_name_t *sender,
|
||||
opal_buffer_t *data);
|
||||
|
||||
#endif /* ORTE_DISABLE_FULL_SUPPORT */
|
||||
|
||||
|
@ -49,7 +49,7 @@ static void allgather_server_recv(int status, orte_process_name_t* sender,
|
||||
{
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s allgather buffer received from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
@ -79,7 +79,7 @@ static void allgather_client_recv(int status, orte_process_name_t* sender,
|
||||
{
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base: allgather buffer received",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -110,7 +110,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
int32_t num_peers;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm: entering allgather_list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -123,7 +123,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
/*** NON-ROOT ***/
|
||||
if (OPAL_EQUAL != opal_dss.compare(&root->name, ORTE_PROC_MY_NAME, ORTE_NAME)) {
|
||||
/* everyone but root sends data */
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s allgather_list: sending my data to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&root->name)));
|
||||
@ -133,7 +133,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s allgather_list: buffer sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -169,7 +169,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
}
|
||||
OBJ_RELEASE(allgather_buf);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s allgather_list: buffer received",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -200,7 +200,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
/* setup the buffer that will recv the results */
|
||||
allgather_buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s allgather_list: waiting to recv %ld inputs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(long)num_peers-1));
|
||||
@ -222,7 +222,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
return rc;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s allgather_list: received all data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -256,7 +256,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
|
||||
ORTE_PROGRESSED_WAIT(false, allgather_num_sent, num_peers-1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm: allgather_list completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
|
213
orte/mca/grpcomm/base/grpcomm_base_app_fns.c
Обычный файл
213
orte/mca/grpcomm/base/grpcomm_base_app_fns.c
Обычный файл
@ -0,0 +1,213 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include <string.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
|
||||
#include "orte/mca/grpcomm/base/base.h"
|
||||
|
||||
int orte_grpcomm_base_app_pack_xcast(orte_daemon_cmd_flag_t cmd,
|
||||
orte_jobid_t job,
|
||||
opal_buffer_t *buffer,
|
||||
opal_buffer_t *message,
|
||||
orte_rml_tag_t tag)
|
||||
{
|
||||
orte_daemon_cmd_flag_t command;
|
||||
int rc;
|
||||
|
||||
/* pack the base cmd for the daemon/HNP */
|
||||
command = cmd;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* pack the target jobid and tag for use in relay */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &job, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* if this isn't intended for the daemon command tag, then we better
|
||||
* tell the daemon to deliver it to the procs, and what job is supposed
|
||||
* to get it - this occurs when a caller just wants to send something
|
||||
* to all the procs in a job. In that use-case, the caller doesn't know
|
||||
* anything about inserting daemon commands or what routing algo might
|
||||
* be used, so we have to help them out a little. Functions that are
|
||||
* sending commands to the daemons themselves are smart enough to know
|
||||
* what they need to do.
|
||||
*/
|
||||
if (ORTE_RML_TAG_DAEMON != tag) {
|
||||
command = ORTE_DAEMON_MESSAGE_LOCAL_PROCS;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &job, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
}
|
||||
|
||||
/* copy the payload into the new buffer - this is non-destructive, so our
|
||||
* caller is still responsible for releasing any memory in the buffer they
|
||||
* gave to us
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(buffer, message))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_grpcomm_base_app_barrier(orte_process_name_t *recipient,
|
||||
orte_grpcomm_collective_t *coll)
|
||||
{
|
||||
int rc;
|
||||
opal_buffer_t buf;
|
||||
orte_rml_tag_t tag=ORTE_RML_TAG_BARRIER;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:app entering barrier",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
/* add the barrier tag */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &tag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* send the buffer to recipient */
|
||||
if (0 > (rc = orte_rml.send_buffer(recipient, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
/* wait to complete */
|
||||
OPAL_THREAD_LOCK(&coll->lock);
|
||||
while (0 == coll->recvd) {
|
||||
opal_condition_wait(&coll->cond, &coll->lock);
|
||||
}
|
||||
coll->recvd = 0; /* reset for next time */
|
||||
OPAL_THREAD_UNLOCK(&coll->lock);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:app received barrier release",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_grpcomm_base_app_allgather(orte_process_name_t *recipient,
|
||||
orte_grpcomm_collective_t *coll,
|
||||
opal_buffer_t *sbuf,
|
||||
opal_buffer_t *rbuf)
|
||||
{
|
||||
int rc;
|
||||
opal_buffer_t buf;
|
||||
orte_rml_tag_t tag=ORTE_RML_TAG_ALLGATHER;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:app entering allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
|
||||
/* if I am alone, just copy data across and return */
|
||||
if (1 == orte_process_info.num_procs) {
|
||||
opal_dss.copy_payload(rbuf, sbuf);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* everyone sends data to their local daemon */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
/* add the allgather tag */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &tag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
/* add our data to it */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(&buf, sbuf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
/* send to recipient */
|
||||
if (0 > (rc = orte_rml.send_buffer(recipient, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:app allgather buffer sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* wait to complete */
|
||||
OPAL_THREAD_LOCK(&coll->lock);
|
||||
while (coll->recvd < orte_process_info.num_procs) {
|
||||
opal_condition_wait(&coll->cond, &coll->lock);
|
||||
}
|
||||
/* xfer the collected data */
|
||||
opal_dss.copy_payload(rbuf, &coll->results);
|
||||
/* reset for next time */
|
||||
OBJ_DESTRUCT(&coll->results);
|
||||
OBJ_CONSTRUCT(&coll->results, opal_buffer_t);
|
||||
coll->recvd = 0;
|
||||
OPAL_THREAD_UNLOCK(&coll->lock);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:app allgather completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -29,15 +29,15 @@ int orte_grpcomm_base_close(void)
|
||||
{
|
||||
/* If we have a selected component and module, then finalize it */
|
||||
|
||||
if (mca_grpcomm_base_selected) {
|
||||
if (orte_grpcomm_base.selected) {
|
||||
orte_grpcomm.finalize();
|
||||
}
|
||||
|
||||
/* Close all remaining available components (may be one if this is a
|
||||
OpenRTE program, or [possibly] multiple if this is ompi_info) */
|
||||
|
||||
mca_base_components_close(orte_grpcomm_base_output,
|
||||
&mca_grpcomm_base_components_available, NULL);
|
||||
mca_base_components_close(orte_grpcomm_base.output,
|
||||
&orte_grpcomm_base.components_available, NULL);
|
||||
|
||||
/* All done */
|
||||
|
||||
|
@ -33,11 +33,14 @@
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/odls/base/base.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/orted/orted.h"
|
||||
@ -78,7 +81,7 @@ void orte_grpcomm_base_coll_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
@ -106,7 +109,7 @@ int orte_grpcomm_base_allgather(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf,
|
||||
bool has_one;
|
||||
orte_vpid_t n;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:allgather called with %d entries np %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
num_entries, (int)np));
|
||||
@ -157,7 +160,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
|
||||
peer.jobid = jobid;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:two-proc algo employed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -170,7 +173,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &num_entries, 1, OPAL_INT32);
|
||||
opal_dss.copy_payload(&buf, sendbuf);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:two-proc sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
@ -193,7 +196,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
}
|
||||
|
||||
ORTE_PROGRESSED_WAIT(false, num_recvd, 1);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:two-proc got my return message",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -210,7 +213,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
}
|
||||
|
||||
ORTE_PROGRESSED_WAIT(false, num_recvd, 1);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:two-proc got my starting message",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -219,7 +222,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
opal_dss.pack(&buf, &num_entries, 1, OPAL_INT32);
|
||||
opal_dss.copy_payload(&buf, sendbuf);
|
||||
peer.vpid = vpids[0];
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:two-proc sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
@ -268,7 +271,7 @@ static int bruck(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_ent
|
||||
opal_buffer_t collection, buf;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:bruck algo employed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -313,7 +316,7 @@ static int bruck(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_ent
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &total_entries, 1, OPAL_INT32);
|
||||
opal_dss.copy_payload(&buf, &collection);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:bruck sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
@ -383,7 +386,7 @@ static int recursivedoubling(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int
|
||||
orte_process_name_t peer;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:recdub algo employed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -426,7 +429,7 @@ static int recursivedoubling(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &total_entries, 1, OPAL_INT32);
|
||||
opal_dss.copy_payload(&buf, &collection);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:coll:recdub sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
@ -481,3 +484,425 @@ static int recursivedoubling(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**** DAEMON COLLECTIVE SUPPORT ****/
|
||||
|
||||
static void reset_child_participation(orte_jobid_t job)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
orte_odls_child_t *child;
|
||||
|
||||
for (item = opal_list_get_first(&orte_local_children);
|
||||
item != opal_list_get_end(&orte_local_children);
|
||||
item = opal_list_get_next(item)) {
|
||||
child = (orte_odls_child_t*)item;
|
||||
|
||||
/* is this child part of the specified job? */
|
||||
if (child->name->jobid == job) {
|
||||
/* clear flag */
|
||||
child->coll_recvd = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool all_children_participated(orte_jobid_t job)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
orte_odls_child_t *child;
|
||||
|
||||
for (item = opal_list_get_first(&orte_local_children);
|
||||
item != opal_list_get_end(&orte_local_children);
|
||||
item = opal_list_get_next(item)) {
|
||||
child = (orte_odls_child_t*)item;
|
||||
|
||||
/* is this child part of the specified job? */
|
||||
if (child->name->jobid == job && !child->coll_recvd) {
|
||||
/* if this child has *not* participated yet, return false */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we get here, then everyone in the job has participated */
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
void orte_grpcomm_base_daemon_collective(orte_process_name_t *sender,
|
||||
opal_buffer_t *data)
|
||||
{
|
||||
orte_jobid_t jobid;
|
||||
orte_odls_job_t *jobdat;
|
||||
orte_routed_tree_t *child;
|
||||
orte_std_cntr_t n;
|
||||
opal_list_t daemon_tree;
|
||||
opal_list_item_t *item, *next;
|
||||
int32_t num_contributors;
|
||||
opal_buffer_t buf;
|
||||
orte_process_name_t my_parent, proc;
|
||||
orte_vpid_t daemonvpid;
|
||||
int rc;
|
||||
int32_t numc;
|
||||
orte_rml_tag_t rmltag;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll: daemon collective called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* unpack the jobid using this collective */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobid, &n, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* lookup the job record for it */
|
||||
jobdat = NULL;
|
||||
for (item = opal_list_get_first(&orte_local_jobdata);
|
||||
item != opal_list_get_end(&orte_local_jobdata);
|
||||
item = opal_list_get_next(item)) {
|
||||
jobdat = (orte_odls_job_t*)item;
|
||||
|
||||
/* is this the specified job? */
|
||||
if (jobdat->jobid == jobid) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == jobdat) {
|
||||
/* race condition - someone sent us a collective before we could
|
||||
* parse the add_local_procs cmd. Just add the jobdat object
|
||||
* and continue
|
||||
*/
|
||||
jobdat = OBJ_NEW(orte_odls_job_t);
|
||||
jobdat->jobid = jobid;
|
||||
opal_list_append(&orte_local_jobdata, &jobdat->super);
|
||||
}
|
||||
|
||||
/* it may be possible to get here prior to having actually finished processing our
|
||||
* local launch msg due to the race condition between different nodes and when
|
||||
* they start their individual procs. Hence, we have to first ensure that we
|
||||
* -have- finished processing the launch msg, or else we won't know whether
|
||||
* or not to wait before sending this on
|
||||
*/
|
||||
OPAL_THREAD_LOCK(&jobdat->lock);
|
||||
while (!jobdat->launch_msg_processed) {
|
||||
opal_condition_wait(&jobdat->cond, &jobdat->lock);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&jobdat->lock);
|
||||
|
||||
/* unpack the tag for this collective */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &rmltag, &n, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
/* unpack the number of contributors in this data bucket */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &num_contributors, &n, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
jobdat->num_contributors += num_contributors;
|
||||
|
||||
/* xfer the data */
|
||||
opal_dss.copy_payload(&jobdat->collection_bucket, data);
|
||||
|
||||
/* count the number of participants collected */
|
||||
jobdat->num_collected++;
|
||||
|
||||
/* if we haven't already done so, figure out how many participants we
|
||||
* should be expecting
|
||||
*/
|
||||
if (jobdat->num_participating < 0) {
|
||||
if (0 < jobdat->num_local_procs) {
|
||||
/* we have children, so account for our own participation */
|
||||
jobdat->num_participating = 1;
|
||||
} else {
|
||||
jobdat->num_participating = 0;
|
||||
}
|
||||
/* now see if anyone else will be sending us something */
|
||||
OBJ_CONSTRUCT(&daemon_tree, opal_list_t);
|
||||
orte_routed.get_routing_tree(&daemon_tree);
|
||||
/* unfortunately, there is no simple way to determine which of our "child"
|
||||
* daemons in the routing tree will be sending us something. All we can do
|
||||
* is brute force a search, though we attempt to keep it as short as possible
|
||||
*/
|
||||
proc.jobid = jobid;
|
||||
proc.vpid = 0;
|
||||
while (proc.vpid < jobdat->num_procs && 0 < opal_list_get_size(&daemon_tree)) {
|
||||
/* get the daemon that hosts this proc */
|
||||
daemonvpid = orte_ess.proc_get_daemon(&proc);
|
||||
/* is this daemon one of our children, or at least its contribution
|
||||
* will pass through one of our children
|
||||
*/
|
||||
item = opal_list_get_first(&daemon_tree);
|
||||
while (item != opal_list_get_end(&daemon_tree)) {
|
||||
next = opal_list_get_next(item);
|
||||
child = (orte_routed_tree_t*)item;
|
||||
if (child->vpid == daemonvpid || opal_bitmap_is_set_bit(&child->relatives, daemonvpid)) {
|
||||
/* it does - add to num_participating */
|
||||
jobdat->num_participating++;
|
||||
/* remove this from the list so we don't double count it */
|
||||
opal_list_remove_item(&daemon_tree, item);
|
||||
/* done with search */
|
||||
break;
|
||||
}
|
||||
item = next;
|
||||
}
|
||||
proc.vpid++;
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll: daemon collective for job %s from %s type %ld"
|
||||
" num_collected %d num_participating %d num_contributors %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobid),
|
||||
ORTE_NAME_PRINT(sender),
|
||||
(long)jobdat->collective_type, jobdat->num_collected,
|
||||
jobdat->num_participating, jobdat->num_contributors));
|
||||
|
||||
if (jobdat->num_collected == jobdat->num_participating) {
|
||||
/* if I am the HNP, go process the results */
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
goto hnp_process;
|
||||
}
|
||||
|
||||
/* if I am not the HNP, send to my parent */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
/* pack the jobid */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &jobid, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* pack the target tag */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &rmltag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* pack the number of contributors */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &jobdat->num_contributors, 1, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* xfer the payload*/
|
||||
opal_dss.copy_payload(&buf, &jobdat->collection_bucket);
|
||||
/* reset everything for next collective */
|
||||
jobdat->num_contributors = 0;
|
||||
jobdat->num_collected = 0;
|
||||
OBJ_DESTRUCT(&jobdat->collection_bucket);
|
||||
OBJ_CONSTRUCT(&jobdat->collection_bucket, opal_buffer_t);
|
||||
/* send it */
|
||||
my_parent.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
my_parent.vpid = orte_routed.get_routing_tree(NULL);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll: daemon collective not the HNP - sending to parent %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&my_parent)));
|
||||
if (0 > (rc = orte_rml.send_buffer(&my_parent, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
}
|
||||
return;
|
||||
|
||||
hnp_process:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll: daemon collective HNP - xcasting to job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jobid)));
|
||||
/* setup a buffer to send the results back to the job members */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
||||
/* add any collected data */
|
||||
numc = jobdat->num_contributors;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &numc, 1, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(&buf, &jobdat->collection_bucket))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
/* reset everything for next collective */
|
||||
jobdat->num_contributors = 0;
|
||||
jobdat->num_collected = 0;
|
||||
OBJ_DESTRUCT(&jobdat->collection_bucket);
|
||||
OBJ_CONSTRUCT(&jobdat->collection_bucket, opal_buffer_t);
|
||||
/* send the buffer */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(jobid, &buf, rmltag))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void process_msg(int fd, short event, void *data)
|
||||
{
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
orte_process_name_t *proc;
|
||||
opal_buffer_t *buf, relay;
|
||||
int32_t rc, n;
|
||||
opal_list_item_t *item;
|
||||
orte_odls_child_t *child;
|
||||
bool found = false;
|
||||
orte_odls_job_t *jobdat;
|
||||
orte_rml_tag_t rmltag;
|
||||
|
||||
proc = &mev->sender;
|
||||
buf = mev->buffer;
|
||||
|
||||
/* is the sender a local proc, or a daemon relaying the collective? */
|
||||
if (ORTE_PROC_MY_NAME->jobid == proc->jobid) {
|
||||
/* this is a relay - call that code */
|
||||
orte_grpcomm_base.daemon_coll(proc, buf);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_local_children);
|
||||
item != opal_list_get_end(&orte_local_children);
|
||||
item = opal_list_get_next(item)) {
|
||||
child = (orte_odls_child_t*)item;
|
||||
|
||||
/* find this child */
|
||||
if (OPAL_EQUAL == opal_dss.compare(proc, child->name, ORTE_NAME)) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll: collecting data from child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if it wasn't found on the list, then we need to add it - must have
|
||||
* come from a singleton
|
||||
*/
|
||||
if (!found) {
|
||||
child = OBJ_NEW(orte_odls_child_t);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, proc, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
opal_list_append(&orte_local_children, &child->super);
|
||||
/* we don't know any other info about the child, so just indicate it's
|
||||
* alive
|
||||
*/
|
||||
child->alive = true;
|
||||
/* setup a jobdat for it */
|
||||
orte_odls_base_setup_singleton_jobdat(proc->jobid);
|
||||
}
|
||||
|
||||
/* this was one of our local procs - find the jobdat for this job */
|
||||
jobdat = NULL;
|
||||
for (item = opal_list_get_first(&orte_local_jobdata);
|
||||
item != opal_list_get_end(&orte_local_jobdata);
|
||||
item = opal_list_get_next(item)) {
|
||||
jobdat = (orte_odls_job_t*)item;
|
||||
|
||||
/* is this the specified job? */
|
||||
if (jobdat->jobid == proc->jobid) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == jobdat) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* unpack the target tag */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &rmltag, &n, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* collect the provided data */
|
||||
opal_dss.copy_payload(&jobdat->local_collection, buf);
|
||||
|
||||
/* flag this proc as having participated */
|
||||
child->coll_recvd = true;
|
||||
|
||||
/* now check to see if all local procs in this job have participated */
|
||||
if (all_children_participated(proc->jobid)) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll: executing collective",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* prep a buffer to pass it all along */
|
||||
OBJ_CONSTRUCT(&relay, opal_buffer_t);
|
||||
/* pack the jobid */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&relay, &proc->jobid, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* pack the target tag */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&relay, &rmltag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* pack the number of contributors */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&relay, &jobdat->num_local_procs, 1, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
/* xfer the payload*/
|
||||
opal_dss.copy_payload(&relay, &jobdat->local_collection);
|
||||
/* refresh the collection bucket for reuse */
|
||||
OBJ_DESTRUCT(&jobdat->local_collection);
|
||||
OBJ_CONSTRUCT(&jobdat->local_collection, opal_buffer_t);
|
||||
reset_child_participation(proc->jobid);
|
||||
/* pass this to the daemon collective operation */
|
||||
orte_grpcomm_base.daemon_coll(ORTE_PROC_MY_NAME, &relay);
|
||||
/* done with the relay */
|
||||
OBJ_DESTRUCT(&relay);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll: collective completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
/* release the message */
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
void orte_grpcomm_base_daemon_coll_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:daemon_coll:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release above - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_msg);
|
||||
|
||||
/* reissue the recv */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_DAEMON_COLLECTIVE,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
orte_grpcomm_base_daemon_coll_recv,
|
||||
cbdata))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
orte_vpid_t daemon;
|
||||
char *hostname;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex: performing modex",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -107,7 +107,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex: executing allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -117,7 +117,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex: processing modex info",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -130,7 +130,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex: received %ld data bytes from %d procs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(long)(rbuf.pack_ptr - rbuf.unpack_ptr), num_procs));
|
||||
@ -188,7 +188,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
}
|
||||
if (NULL == nid) {
|
||||
/* node wasn't found - let's add it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex no nidmap entry for node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname));
|
||||
nid = OBJ_NEW(orte_nid_t);
|
||||
@ -200,7 +200,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
/* see if we have this job in a jobmap */
|
||||
if (NULL == (jmap = orte_util_lookup_jmap(proc_name.jobid))) {
|
||||
/* proc wasn't found - let's add it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex no jobmap entry for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(proc_name.jobid)));
|
||||
@ -226,7 +226,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
/* see if we have this proc in a pidmap */
|
||||
if (NULL == orte_util_lookup_pmap(&proc_name)) {
|
||||
/* proc wasn't found - let's add it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex no pidmap entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
@ -243,7 +243,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex: adding modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
@ -264,7 +264,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:full:modex adding %d entries for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
@ -331,7 +331,7 @@ int orte_grpcomm_base_modex_unpack( opal_buffer_t* rbuf, bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:modex:unpack: received %ld data bytes from %d procs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(long)(rbuf->pack_ptr - rbuf->unpack_ptr), num_procs));
|
||||
@ -352,7 +352,7 @@ int orte_grpcomm_base_modex_unpack( opal_buffer_t* rbuf, bool modex_db)
|
||||
|
||||
/* SINCE THIS IS AMONGST PEERS, THERE IS NO NEED TO UPDATE THE NIDMAP/PIDMAP */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:modex:unpack: adding modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
@ -373,7 +373,7 @@ int orte_grpcomm_base_modex_unpack( opal_buffer_t* rbuf, bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:modex:unpack: adding %d entries for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
@ -381,7 +381,7 @@ int orte_grpcomm_base_modex_unpack( opal_buffer_t* rbuf, bool modex_db)
|
||||
/* find this proc's node in the nidmap */
|
||||
if (NULL == (nid = orte_util_lookup_nid(&proc_name))) {
|
||||
/* proc wasn't found - return error */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:modex:unpack: no nidmap entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
@ -440,7 +440,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
int rc = ORTE_SUCCESS;
|
||||
bool modex_reqd;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:peer:modex: performing modex",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -460,7 +460,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:peer:modex: executing allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -470,7 +470,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:peer:modex: processing modex info",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -728,7 +728,7 @@ int orte_grpcomm_base_set_proc_attr(const char *attr_name,
|
||||
|
||||
OPAL_THREAD_LOCK(&mutex);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:set_proc_attr: setting attribute %s data size %lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
attr_name, (unsigned long)size));
|
||||
@ -769,14 +769,14 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
modex_proc_data_t *proc_data;
|
||||
modex_attr_data_t *attr_data;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:get_proc_attr: searching for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attribute_name,
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
|
||||
proc_data = modex_lookup_orte_proc(&proc);
|
||||
if (NULL == proc_data) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:get_proc_attr: no modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
@ -791,7 +791,7 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
/* copy the data out to the user */
|
||||
if ((NULL == attr_data) ||
|
||||
(attr_data->attr_data_size == 0)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:get_proc_attr: no attr avail or zero byte size for proc %s attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc), attribute_name));
|
||||
@ -807,7 +807,7 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
memcpy(copy, attr_data->attr_data, attr_data->attr_data_size);
|
||||
*val = copy;
|
||||
*size = attr_data->attr_data_size;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:get_proc_attr: found %d bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)attr_data->attr_data_size,
|
||||
attribute_name, ORTE_NAME_PRINT(&proc)));
|
||||
@ -843,7 +843,7 @@ int orte_grpcomm_base_pack_modex_entries(opal_buffer_t *buf, bool *mdx_reqd)
|
||||
{
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:pack_modex: reporting %ld entries",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(long)num_entries));
|
||||
@ -899,7 +899,7 @@ int orte_grpcomm_base_update_modex_entries(orte_process_name_t *proc_name,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:update_modex_entries: adding %d entries for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
ORTE_NAME_PRINT(proc_name)));
|
||||
@ -968,7 +968,7 @@ int orte_grpcomm_base_load_modex_data(orte_process_name_t *proc_name, char *attr
|
||||
int rc = ORTE_SUCCESS;
|
||||
void *bytes;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:load_modex_data: loading %ld bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(long)num_bytes, attr_name, ORTE_NAME_PRINT(proc_name)));
|
||||
|
@ -40,12 +40,9 @@
|
||||
/*
|
||||
* Global variables
|
||||
*/
|
||||
int orte_grpcomm_base_output = -1;
|
||||
bool mca_grpcomm_base_selected;
|
||||
orte_grpcomm_base_t orte_grpcomm_base;
|
||||
|
||||
orte_grpcomm_base_module_t orte_grpcomm = {0};
|
||||
opal_list_t mca_grpcomm_base_components_available;
|
||||
orte_grpcomm_base_component_t mca_grpcomm_base_selected_component;
|
||||
int orte_grpcomm_profile_fd = -1;
|
||||
|
||||
/**
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
@ -55,14 +52,18 @@ int orte_grpcomm_base_open(void)
|
||||
{
|
||||
/* Debugging / verbose output. Always have stream open, with
|
||||
verbose set by the mca open system... */
|
||||
orte_grpcomm_base_output = opal_output_open(NULL);
|
||||
orte_grpcomm_base.output = opal_output_open(NULL);
|
||||
orte_grpcomm_base.profile_fd = -1;
|
||||
|
||||
/* define the default daemon collective fn */
|
||||
orte_grpcomm_base.daemon_coll = orte_grpcomm_base_daemon_collective;
|
||||
|
||||
/* Open up all available components */
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
mca_base_components_open("grpcomm", orte_grpcomm_base_output,
|
||||
mca_base_components_open("grpcomm", orte_grpcomm_base.output,
|
||||
mca_grpcomm_base_static_components,
|
||||
&mca_grpcomm_base_components_available, true)) {
|
||||
&orte_grpcomm_base.components_available, true)) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -70,3 +71,22 @@ int orte_grpcomm_base_open(void)
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* local objects */
|
||||
static void collective_constructor(orte_grpcomm_collective_t *ptr)
|
||||
{
|
||||
OBJ_CONSTRUCT(&ptr->lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&ptr->cond, opal_condition_t);
|
||||
OBJ_CONSTRUCT(&ptr->results, opal_buffer_t);
|
||||
ptr->recvd = 0;
|
||||
}
|
||||
static void collective_destructor(orte_grpcomm_collective_t *ptr)
|
||||
{
|
||||
OBJ_DESTRUCT(&ptr->lock);
|
||||
OBJ_DESTRUCT(&ptr->cond);
|
||||
OBJ_DESTRUCT(&ptr->results);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_grpcomm_collective_t,
|
||||
opal_object_t,
|
||||
collective_constructor,
|
||||
collective_destructor);
|
||||
|
@ -38,8 +38,8 @@ int orte_grpcomm_base_select(void)
|
||||
/*
|
||||
* Select the best component
|
||||
*/
|
||||
if( OPAL_SUCCESS != mca_base_select("grpcomm", orte_grpcomm_base_output,
|
||||
&mca_grpcomm_base_components_available,
|
||||
if( OPAL_SUCCESS != mca_base_select("grpcomm", orte_grpcomm_base.output,
|
||||
&orte_grpcomm_base.components_available,
|
||||
(mca_base_module_t **) &best_module,
|
||||
(mca_base_component_t **) &best_component) ) {
|
||||
/* This will only happen if no component was selected */
|
||||
@ -56,7 +56,7 @@ int orte_grpcomm_base_select(void)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
mca_grpcomm_base_selected = true;
|
||||
orte_grpcomm_base.selected = true;
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -51,8 +51,8 @@ static void finalize(void);
|
||||
static int xcast(orte_jobid_t job,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag);
|
||||
static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf);
|
||||
static int barrier(void);
|
||||
static int hier_allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf);
|
||||
static int hier_barrier(void);
|
||||
static int modex(opal_list_t *procs);
|
||||
static int set_proc_attr(const char *attr_name, const void *data, size_t size);
|
||||
static int get_proc_attr(const orte_process_name_t proc,
|
||||
@ -64,9 +64,9 @@ orte_grpcomm_base_module_t orte_grpcomm_hier_module = {
|
||||
init,
|
||||
finalize,
|
||||
xcast,
|
||||
allgather,
|
||||
hier_allgather,
|
||||
orte_grpcomm_base_allgather_list,
|
||||
barrier,
|
||||
hier_barrier,
|
||||
NULL, /* onesided barrier only used by daemons */
|
||||
set_proc_attr,
|
||||
get_proc_attr,
|
||||
@ -79,10 +79,11 @@ orte_grpcomm_base_module_t orte_grpcomm_hier_module = {
|
||||
static orte_local_rank_t my_local_rank;
|
||||
static opal_list_t my_local_peers;
|
||||
static orte_process_name_t my_local_rank_zero_proc;
|
||||
static int num_local_peers;
|
||||
static size_t num_local_peers;
|
||||
static bool coll_initialized = false;
|
||||
static orte_vpid_t *my_coll_peers=NULL;
|
||||
static int cpeers=0;
|
||||
static orte_grpcomm_collective_t barrier, allgather;
|
||||
|
||||
/**
|
||||
* Initialize the module
|
||||
@ -92,6 +93,8 @@ static int init(void)
|
||||
int rc;
|
||||
|
||||
OBJ_CONSTRUCT(&my_local_peers, opal_list_t);
|
||||
OBJ_CONSTRUCT(&barrier, orte_grpcomm_collective_t);
|
||||
OBJ_CONSTRUCT(&allgather, orte_grpcomm_collective_t);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_modex_init())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -115,6 +118,9 @@ static void finalize(void)
|
||||
}
|
||||
OBJ_DESTRUCT(&my_local_peers);
|
||||
|
||||
OBJ_DESTRUCT(&barrier);
|
||||
OBJ_DESTRUCT(&allgather);
|
||||
|
||||
if (NULL != my_coll_peers) {
|
||||
free(my_coll_peers);
|
||||
}
|
||||
@ -132,9 +138,8 @@ static int xcast(orte_jobid_t job,
|
||||
{
|
||||
int rc = ORTE_SUCCESS;
|
||||
opal_buffer_t buf;
|
||||
orte_daemon_cmd_flag_t command;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:xcast sent to job %s tag %ld",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), (long)tag));
|
||||
@ -144,58 +149,11 @@ static int xcast(orte_jobid_t job,
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* setup a buffer to handle the xcast command */
|
||||
/* prep the output buffer */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
/* all we need to do is send this to the HNP - the relay logic
|
||||
* will ensure everyone else gets it! So tell the HNP to
|
||||
* process and relay it. The HNP will use the routed.get_routing_tree
|
||||
* to find out who it should relay the message to.
|
||||
*/
|
||||
command = ORTE_DAEMON_PROCESS_AND_RELAY_CMD;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_DAEMON_CMD))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* pack the target jobid and tag for use in relay */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &job, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &tag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* if this isn't intended for the daemon command tag, then we better
|
||||
* tell the daemon to deliver it to the procs, and what job is supposed
|
||||
* to get it - this occurs when a caller just wants to send something
|
||||
* to all the procs in a job. In that use-case, the caller doesn't know
|
||||
* anything about inserting daemon commands or what routing algo might
|
||||
* be used, so we have to help them out a little. Functions that are
|
||||
* sending commands to the daemons themselves are smart enough to know
|
||||
* what they need to do.
|
||||
*/
|
||||
if (ORTE_RML_TAG_DAEMON != tag) {
|
||||
command = ORTE_DAEMON_MESSAGE_LOCAL_PROCS;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_DAEMON_CMD))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &job, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &tag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
}
|
||||
|
||||
/* copy the payload into the new buffer - this is non-destructive, so our
|
||||
* caller is still responsible for releasing any memory in the buffer they
|
||||
* gave to us
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(&buf, buffer))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_app_pack_xcast(ORTE_DAEMON_PROCESS_AND_RELAY_CMD,
|
||||
job, &buf, buffer, tag))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
@ -225,80 +183,58 @@ CLEANUP:
|
||||
|
||||
|
||||
/* the barrier is executed as an allgather with data length of zero */
|
||||
static int barrier(void)
|
||||
static int hier_barrier(void)
|
||||
{
|
||||
opal_buffer_t buf1, buf2;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier entering barrier",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
OBJ_CONSTRUCT(&buf1, opal_buffer_t);
|
||||
OBJ_CONSTRUCT(&buf2, opal_buffer_t);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = allgather(&buf1, &buf2))) {
|
||||
if (ORTE_SUCCESS != (rc = hier_allgather(&buf1, &buf2))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
OBJ_DESTRUCT(&buf1);
|
||||
OBJ_DESTRUCT(&buf2);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier barrier complete",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static opal_buffer_t allgather_buf;
|
||||
static int allgather_num_recvd;
|
||||
|
||||
static void process_msg(int fd, short event, void *data)
|
||||
{
|
||||
int rc;
|
||||
orte_message_event_t *mev = (orte_message_event_t*)data;
|
||||
|
||||
/* xfer the data */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(&allgather_buf, mev->buffer))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
allgather_num_recvd++;
|
||||
/* release the message */
|
||||
OBJ_RELEASE(mev);
|
||||
}
|
||||
|
||||
static void allgather_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata)
|
||||
{
|
||||
orte_grpcomm_collective_t *coll = (orte_grpcomm_collective_t*)cbdata;
|
||||
int rc;
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
* more messaging! Instead, setup an event so that the message gets processed
|
||||
* as soon as we leave the recv.
|
||||
*
|
||||
* The macro makes a copy of the buffer, which we release above - the incoming
|
||||
* buffer, however, is NOT released here, although its payload IS transferred
|
||||
* to the message buffer for later processing
|
||||
*/
|
||||
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_msg);
|
||||
|
||||
/* reissue the recv */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER,
|
||||
ORTE_RML_NON_PERSISTENT, allgather_recv, NULL))) {
|
||||
OPAL_THREAD_LOCK(&coll->lock);
|
||||
/* xfer the data */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(&coll->results, buffer))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
coll->recvd += 1;
|
||||
if (num_local_peers == coll->recvd) {
|
||||
opal_condition_broadcast(&coll->cond);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&coll->lock);
|
||||
}
|
||||
|
||||
static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
static int hier_allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
{
|
||||
int rc=ORTE_SUCCESS;
|
||||
opal_list_item_t *item;
|
||||
orte_namelist_t *nm;
|
||||
opal_buffer_t final_buf;
|
||||
opal_buffer_t tmp_buf;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier entering allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -354,58 +290,74 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
/* if I am not local rank = 0 */
|
||||
if (0 != my_local_rank) {
|
||||
/* setup the collective */
|
||||
OPAL_THREAD_LOCK(&allgather.lock);
|
||||
allgather.recvd = num_local_peers - 1;
|
||||
/* reset the collector */
|
||||
OBJ_DESTRUCT(&allgather.results);
|
||||
OBJ_CONSTRUCT(&allgather.results, opal_buffer_t);
|
||||
OPAL_THREAD_UNLOCK(&allgather.lock);
|
||||
|
||||
/* send our data to the local_rank=0 proc on this node */
|
||||
if (0 > (rc = orte_rml.send_buffer(&my_local_rank_zero_proc, sbuf, ORTE_RML_TAG_ALLGATHER, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* setup to get return buffer */
|
||||
OBJ_CONSTRUCT(&allgather_buf, opal_buffer_t);
|
||||
|
||||
/* now receive the final result. Be sure to do this in
|
||||
* a manner that allows us to return without being in a recv!
|
||||
*/
|
||||
allgather_num_recvd = 0;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER,
|
||||
ORTE_RML_NON_PERSISTENT, allgather_recv, NULL);
|
||||
ORTE_RML_NON_PERSISTENT, allgather_recv, &allgather);
|
||||
if (rc != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
ORTE_PROGRESSED_WAIT(false, allgather_num_recvd, 1);
|
||||
|
||||
/* cancel the lingering recv */
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER);
|
||||
|
||||
/* wait to complete */
|
||||
OPAL_THREAD_LOCK(&allgather.lock);
|
||||
while (allgather.recvd < num_local_peers) {
|
||||
opal_condition_wait(&allgather.cond, &allgather.lock);
|
||||
}
|
||||
/* copy payload to the caller's buffer */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, &allgather_buf))) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, &allgather.results))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
OBJ_DESTRUCT(&allgather_buf);
|
||||
OPAL_THREAD_UNLOCK(&allgather.lock);
|
||||
|
||||
|
||||
} else {
|
||||
/* I am local_rank = 0 on this node! */
|
||||
|
||||
/* setup to recv data from the procs that share this node with me */
|
||||
OBJ_CONSTRUCT(&allgather_buf, opal_buffer_t);
|
||||
|
||||
/* seed it with my own data */
|
||||
opal_dss.copy_payload(&allgather_buf, sbuf);
|
||||
|
||||
/* setup the collective */
|
||||
OPAL_THREAD_LOCK(&allgather.lock);
|
||||
allgather.recvd = 1;
|
||||
/* reset the collector */
|
||||
OBJ_DESTRUCT(&allgather.results);
|
||||
OBJ_CONSTRUCT(&allgather.results, opal_buffer_t);
|
||||
/* seed with my data */
|
||||
opal_dss.copy_payload(&allgather.results, sbuf);
|
||||
OPAL_THREAD_UNLOCK(&allgather.lock);
|
||||
|
||||
/* wait to receive their data. Be sure to do this in
|
||||
* a manner that allows us to return without being in a recv!
|
||||
*/
|
||||
allgather_num_recvd = 0;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER,
|
||||
ORTE_RML_NON_PERSISTENT, allgather_recv, NULL);
|
||||
ORTE_RML_PERSISTENT, allgather_recv, &allgather);
|
||||
if (rc != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
ORTE_PROGRESSED_WAIT(false, allgather_num_recvd, num_local_peers);
|
||||
/* wait to complete */
|
||||
OPAL_THREAD_LOCK(&allgather.lock);
|
||||
while (allgather.recvd < num_local_peers) {
|
||||
opal_condition_wait(&allgather.cond, &allgather.lock);
|
||||
}
|
||||
/* xfer to the tmp buf in case another allgather comes along */
|
||||
OBJ_CONSTRUCT(&tmp_buf, opal_buffer_t);
|
||||
opal_dss.copy_payload(&tmp_buf, &allgather.results);
|
||||
OPAL_THREAD_UNLOCK(&allgather.lock);
|
||||
|
||||
/* cancel the lingering recv */
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER);
|
||||
@ -415,16 +367,14 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
* manner - the exact collective will depend upon the number of
|
||||
* nodes in the job
|
||||
*/
|
||||
OBJ_CONSTRUCT(&final_buf, opal_buffer_t);
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_allgather(&allgather_buf, rbuf, num_local_peers + 1,
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_allgather(&tmp_buf, rbuf, num_local_peers + 1,
|
||||
ORTE_PROC_MY_NAME->jobid,
|
||||
cpeers, my_coll_peers))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&allgather_buf);
|
||||
OBJ_DESTRUCT(&final_buf);
|
||||
OBJ_DESTRUCT(&tmp_buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&allgather_buf); /* done with this */
|
||||
OBJ_DESTRUCT(&tmp_buf); /* done with this */
|
||||
|
||||
/* distribute the results to our local peers */
|
||||
for (item = opal_list_get_first(&my_local_peers);
|
||||
@ -438,7 +388,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier allgather completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -458,7 +408,7 @@ static int modex(opal_list_t *procs)
|
||||
orte_attr_t *attrdata;
|
||||
opal_buffer_t bobuf;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier: modex entered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -511,7 +461,7 @@ static int modex(opal_list_t *procs)
|
||||
* no mixing of the two. In this case, we include the info in the modex
|
||||
*/
|
||||
if (orte_hetero_apps || !orte_homogeneous_nodes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier: modex is required",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -527,7 +477,7 @@ static int modex(opal_list_t *procs)
|
||||
/* if we don't have any other way to do this, then let's default to doing the
|
||||
* modex so we at least can function, even if it isn't as fast as we might like
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier: modex is required",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_peer_modex(false))) {
|
||||
@ -542,7 +492,7 @@ static int modex(opal_list_t *procs)
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier:modex reading %s file",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), opal_profile_file));
|
||||
|
||||
@ -606,7 +556,7 @@ static int modex(opal_list_t *procs)
|
||||
OBJ_DESTRUCT(&bobuf);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier: modex completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
@ -616,7 +566,7 @@ static int modex(opal_list_t *procs)
|
||||
/* the HNP will -never- execute the following as it is NOT an MPI process */
|
||||
static int set_proc_attr(const char *attr_name, const void *data, size_t size)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier:set_proc_attr for attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name));
|
||||
|
||||
@ -637,7 +587,7 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
/* find this proc's node in the nidmap */
|
||||
if (NULL == (nid = orte_util_lookup_nid((orte_process_name_t*)&proc))) {
|
||||
/* proc wasn't found - return error */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier:get_proc_attr: no modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
@ -660,7 +610,7 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
memcpy(copy, attr->bytes, attr->size);
|
||||
*val = copy;
|
||||
*size = attr->size;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier:get_proc_attr: found %d bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)attr->size,
|
||||
attribute_name, ORTE_NAME_PRINT(&proc)));
|
||||
@ -669,7 +619,7 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
}
|
||||
|
||||
/* get here if attribute isn't found */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:hier:get_proc_attr: no attr avail or zero byte size for proc %s attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc), attribute_name));
|
||||
|
36
orte/mca/grpcomm/mcast/Makefile.am
Обычный файл
36
orte/mca/grpcomm/mcast/Makefile.am
Обычный файл
@ -0,0 +1,36 @@
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
EXTRA_DIST = .windows
|
||||
|
||||
sources = \
|
||||
grpcomm_mcast.h \
|
||||
grpcomm_mcast.c \
|
||||
grpcomm_mcast_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_grpcomm_mcast_DSO
|
||||
component_noinst =
|
||||
component_install = mca_grpcomm_mcast.la
|
||||
else
|
||||
component_noinst = libmca_grpcomm_mcast.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_grpcomm_mcast_la_SOURCES = $(sources)
|
||||
mca_grpcomm_mcast_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_grpcomm_mcast_la_SOURCES =$(sources)
|
||||
libmca_grpcomm_mcast_la_LDFLAGS = -module -avoid-version
|
18
orte/mca/grpcomm/mcast/configure.m4
Обычный файл
18
orte/mca/grpcomm/mcast/configure.m4
Обычный файл
@ -0,0 +1,18 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_grpcomm_mcast_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_grpcomm_mcast_CONFIG], [$
|
||||
# if we don't want reliable multicast, don't compile
|
||||
# this component
|
||||
AS_IF([test "$orte_want_multicast" = "1"],
|
||||
[$1], [$2])
|
||||
])dnl
|
19
orte/mca/grpcomm/mcast/configure.params
Обычный файл
19
orte/mca/grpcomm/mcast/configure.params
Обычный файл
@ -0,0 +1,19 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
||||
#
|
||||
# Set the config priority so that this
|
||||
# component will build for all environs -except-
|
||||
# those special ones that do not support it
|
||||
|
||||
PARAM_CONFIG_PRIORITY=10
|
625
orte/mca/grpcomm/mcast/grpcomm_mcast.c
Обычный файл
625
orte/mca/grpcomm/mcast/grpcomm_mcast.c
Обычный файл
@ -0,0 +1,625 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/odls/base/base.h"
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
#include "orte/mca/rmcast/rmcast.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/nidmap.h"
|
||||
#include "orte/orted/orted.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/rmcast/rmcast.h"
|
||||
|
||||
#include "orte/mca/grpcomm/base/base.h"
|
||||
#include "grpcomm_mcast.h"
|
||||
|
||||
|
||||
/* Static API's */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static int xcast(orte_jobid_t job,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag);
|
||||
static int mcast_allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf);
|
||||
static int mcast_barrier(void);
|
||||
static int mcast_onesided_barrier(void);
|
||||
static int modex(opal_list_t *procs);
|
||||
static int get_proc_attr(const orte_process_name_t proc,
|
||||
const char * attribute_name, void **val,
|
||||
size_t *size);
|
||||
|
||||
/* Module def */
|
||||
orte_grpcomm_base_module_t orte_grpcomm_mcast_module = {
|
||||
init,
|
||||
finalize,
|
||||
xcast,
|
||||
mcast_allgather,
|
||||
orte_grpcomm_base_allgather_list,
|
||||
mcast_barrier,
|
||||
mcast_onesided_barrier,
|
||||
orte_grpcomm_base_set_proc_attr,
|
||||
get_proc_attr,
|
||||
modex,
|
||||
orte_grpcomm_base_purge_proc_attrs
|
||||
};
|
||||
|
||||
/* Local functions */
|
||||
static void daemon_recv(int status,
|
||||
orte_rmcast_channel_t channel,
|
||||
orte_rmcast_tag_t tag,
|
||||
orte_process_name_t *sender,
|
||||
opal_buffer_t *buf, void* cbdata);
|
||||
|
||||
/* Local variables */
|
||||
static orte_grpcomm_collective_t barrier, allgather, onesided_barrier;
|
||||
|
||||
/**
|
||||
* Initialize the module
|
||||
*/
|
||||
static int init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_modex_init())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* setup global variables */
|
||||
OBJ_CONSTRUCT(&barrier, orte_grpcomm_collective_t);
|
||||
OBJ_CONSTRUCT(&allgather, orte_grpcomm_collective_t);
|
||||
OBJ_CONSTRUCT(&onesided_barrier, orte_grpcomm_collective_t);
|
||||
|
||||
/* point to our collective function */
|
||||
orte_grpcomm_base.daemon_coll = orte_grpcomm_mcast_daemon_coll;
|
||||
|
||||
/* if we are a daemon or the hnp, we need to post a
|
||||
* recv to catch any collective operations or cmds
|
||||
*/
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmcast.recv_buffer_nb(ORTE_RMCAST_SYS_CHANNEL,
|
||||
ORTE_RMCAST_TAG_WILDCARD,
|
||||
ORTE_RMCAST_PERSISTENT,
|
||||
daemon_recv,
|
||||
NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize the module
|
||||
*/
|
||||
static void finalize(void)
|
||||
{
|
||||
orte_grpcomm_base_modex_finalize();
|
||||
|
||||
/* cancel the recv we posted */
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
orte_rmcast.cancel_recv(ORTE_RMCAST_SYS_CHANNEL, ORTE_RMCAST_TAG_WILDCARD);
|
||||
}
|
||||
|
||||
/* destruct the globals */
|
||||
OBJ_DESTRUCT(&barrier);
|
||||
OBJ_DESTRUCT(&allgather);
|
||||
OBJ_DESTRUCT(&onesided_barrier);
|
||||
}
|
||||
|
||||
/**
|
||||
* A "broadcast-like" function to a job's processes.
|
||||
* @param jobid The job whose processes are to receive the message
|
||||
* @param buffer The data to broadcast
|
||||
*/
|
||||
static int xcast(orte_jobid_t job,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag)
|
||||
{
|
||||
int rc = ORTE_SUCCESS;
|
||||
int32_t n;
|
||||
opal_buffer_t buf;
|
||||
orte_rml_tag_t rmltag;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:xcast sent to job %s tag %ld",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), (long)tag));
|
||||
|
||||
/* if there is no message to send, then just return ok */
|
||||
if (NULL == buffer) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* setup a buffer to handle the xcast command to an app */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
||||
/* insert the target tag */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &tag, 1, ORTE_RML_TAG_T))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* load the std data */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_app_pack_xcast(ORTE_DAEMON_PROCESS_CMD,
|
||||
job, &buf, buffer, tag))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* send it */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmcast.send_buffer(ORTE_RMCAST_SYS_CHANNEL,
|
||||
ORTE_RMCAST_TAG_MSG, &buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* unpack the rml tag so the buffer is in the right place
|
||||
* for processing
|
||||
*/
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &rmltag, &n, ORTE_RML_TAG_T))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* multicast will not deliver it to myself, so do it manually */
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||
goto CLEANUP;
|
||||
|
||||
CLEANUP:
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
static void barrier_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata)
|
||||
{
|
||||
orte_grpcomm_collective_t *coll = (orte_grpcomm_collective_t*)cbdata;
|
||||
|
||||
OPAL_THREAD_LOCK(&coll->lock);
|
||||
/* flag as recvd */
|
||||
coll->recvd = 1;
|
||||
opal_condition_broadcast(&coll->cond);
|
||||
OPAL_THREAD_UNLOCK(&coll->lock);
|
||||
}
|
||||
|
||||
static int mcast_barrier(void)
|
||||
{
|
||||
int rc;
|
||||
opal_buffer_t buf;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast entering barrier",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if I am alone, just return */
|
||||
if (1 == orte_process_info.num_procs) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if I am a daemon, then multicast the barrier to
|
||||
* all other daemons and wait to hear them all
|
||||
*/
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
OPAL_THREAD_LOCK(&barrier.lock);
|
||||
barrier.recvd += 1; /* account for me */
|
||||
OPAL_THREAD_UNLOCK(&barrier.lock);
|
||||
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
/* send to everyone in my job */
|
||||
if (ORTE_SUCCESS != (rc = xcast(ORTE_PROC_MY_NAME->jobid, &buf, ORTE_RML_TAG_XCAST_BARRIER))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
/* wait to complete */
|
||||
OPAL_THREAD_LOCK(&barrier.lock);
|
||||
while (barrier.recvd < orte_process_info.num_procs) {
|
||||
opal_condition_wait(&barrier.cond, &barrier.lock);
|
||||
}
|
||||
barrier.recvd = 0; /* reset for next time */
|
||||
OPAL_THREAD_UNLOCK(&barrier.lock);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast received barrier release",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if I am an application process, then I must start by sending an RML
|
||||
* message to my local daemon. I cannot just multicast to all other procs
|
||||
* in my job as this barrier might be occurring during startup - and the
|
||||
* other procs might not have started yet, and so will miss my message
|
||||
*/
|
||||
|
||||
/* setup the recv to get the response */
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_BARRIER,
|
||||
ORTE_RML_NON_PERSISTENT, barrier_recv, &barrier);
|
||||
if (rc != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* send it and wait for the response */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_app_barrier(ORTE_PROC_MY_DAEMON, &barrier))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
/* don't need to cancel the recv as it only fires once */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast received barrier release",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/* quick timeout loop */
|
||||
static bool timer_fired;
|
||||
|
||||
static void quicktime_cb(int fd, short event, void *cbdata)
|
||||
{
|
||||
/* declare it fired */
|
||||
timer_fired = true;
|
||||
}
|
||||
|
||||
static int mcast_onesided_barrier(void)
|
||||
{
|
||||
opal_event_t *quicktime=NULL;
|
||||
struct timeval quicktimeval;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast: onesided barrier called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if I am alone, just return */
|
||||
if (1 == orte_process_info.num_procs) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if we are not to use the barrier, then just return */
|
||||
if (!orte_orted_exit_with_barrier) {
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
/* if we are the HNP, we need to do a little delay to give
|
||||
* the orteds a chance to exit before we leave
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast: onesided barrier adding delay timer",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
quicktimeval.tv_sec = 0;
|
||||
quicktimeval.tv_usec = 100;
|
||||
timer_fired = false;
|
||||
ORTE_DETECT_TIMEOUT(&quicktime, orte_process_info.num_procs, 1000, 10000, quicktime_cb);
|
||||
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* if we are not the HNP, just send and leave */
|
||||
if (!ORTE_PROC_IS_HNP) {
|
||||
if (ORTE_SUCCESS != (rc = xcast(ORTE_PROC_MY_NAME->jobid, NULL, ORTE_RML_TAG_ONESIDED_BARRIER))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* initialize things */
|
||||
OPAL_THREAD_LOCK(&onesided_barrier.lock);
|
||||
onesided_barrier.recvd += 1; /* account for me */
|
||||
OPAL_THREAD_UNLOCK(&onesided_barrier.lock);
|
||||
|
||||
/* wait to complete */
|
||||
OPAL_THREAD_LOCK(&onesided_barrier.lock);
|
||||
while (orte_process_info.num_procs <= onesided_barrier.recvd) {
|
||||
opal_condition_wait(&onesided_barrier.cond, &onesided_barrier.lock);
|
||||
}
|
||||
/* reset the collective */
|
||||
onesided_barrier.recvd = 0;
|
||||
OPAL_THREAD_UNLOCK(&onesided_barrier.lock);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void allgather_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata)
|
||||
{
|
||||
orte_grpcomm_collective_t *coll = (orte_grpcomm_collective_t*)cbdata;
|
||||
int rc;
|
||||
|
||||
OPAL_THREAD_LOCK(&coll->lock);
|
||||
/* xfer the data */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(&coll->results, buffer))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
/* the daemon returns ALL of our recipients in a single message */
|
||||
coll->recvd = orte_process_info.num_procs;
|
||||
opal_condition_broadcast(&coll->cond);
|
||||
OPAL_THREAD_UNLOCK(&coll->lock);
|
||||
}
|
||||
|
||||
static int mcast_allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
{
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast entering allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup to receive results */
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER,
|
||||
ORTE_RML_NON_PERSISTENT, allgather_recv, &allgather);
|
||||
if (rc != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* everyone sends data to their local daemon and waits for response */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_app_allgather(ORTE_PROC_MY_DAEMON,
|
||||
&allgather, sbuf, rbuf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* don't need to cancel the recv as it only fires once */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast allgather completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*** MODEX SECTION ***/
|
||||
static int modex(opal_list_t *procs)
|
||||
{
|
||||
int rc=ORTE_SUCCESS;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast: modex entered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we were given a list of procs to modex with, then this is happening
|
||||
* as part of a connect/accept operation. In this case, we -must- do the
|
||||
* modex for two reasons:
|
||||
*
|
||||
* (a) the modex could involve procs from different mpiruns. In this case,
|
||||
* there is no way for the two sets of procs to know which node the
|
||||
* other procs are on, so we cannot use the profile_file to determine
|
||||
* their contact info
|
||||
*
|
||||
* (b) in a comm_spawn, the parent job does not have a pidmap for the
|
||||
* child job. Thus, it cannot know where the child procs are located,
|
||||
* and cannot use the profile_file to determine their contact info
|
||||
*
|
||||
*/
|
||||
if (NULL != procs) {
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_full_modex(procs, false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (OPAL_ENABLE_HETEROGENEOUS_SUPPORT) {
|
||||
/* decide if we need to add the architecture to the modex. Check
|
||||
* first to see if hetero is enabled - if not, then we clearly
|
||||
* don't need to exchange arch's as they are all identical
|
||||
*/
|
||||
/* Case 1: If different apps in this job were built differently - e.g., some
|
||||
* are built 32-bit while others are built 64-bit - then we need to modex
|
||||
* regardless of any other consideration. The user is reqd to tell us via a
|
||||
* cmd line option if this situation exists, which will result in an mca param
|
||||
* being set for us, so all we need to do is check for the global boolean
|
||||
* that corresponds to that param
|
||||
*
|
||||
* Case 2: the nodes are hetero, but the app binaries were built
|
||||
* the same - i.e., either they are both 32-bit, or they are both 64-bit, but
|
||||
* no mixing of the two. In this case, we include the info in the modex
|
||||
*/
|
||||
if (orte_hetero_apps || !orte_homogeneous_nodes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast: modex is required",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_peer_modex(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* no modex is required - see if the data was included in the launch message */
|
||||
if (orte_send_profile) {
|
||||
/* the info was provided in the nidmap - there is nothing more we have to do */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast:modex using nidmap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast: modex completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int get_proc_attr(const orte_process_name_t proc,
|
||||
const char * attribute_name, void **val,
|
||||
size_t *size)
|
||||
{
|
||||
orte_nid_t *nid;
|
||||
opal_list_item_t *item;
|
||||
orte_attr_t *attr;
|
||||
|
||||
/* find this proc's node in the nidmap */
|
||||
if (NULL == (nid = orte_util_lookup_nid((orte_process_name_t*)&proc))) {
|
||||
/* proc wasn't found - return error */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast:get_proc_attr: no modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
|
||||
}
|
||||
|
||||
/* look for this attribute */
|
||||
for (item = opal_list_get_first(&nid->attrs);
|
||||
item != opal_list_get_end(&nid->attrs);
|
||||
item = opal_list_get_next(item)) {
|
||||
attr = (orte_attr_t*)item;
|
||||
if (0 == strcmp(attr->name, attribute_name)) {
|
||||
/* copy the data to the caller */
|
||||
void *copy = malloc(attr->size);
|
||||
|
||||
if (copy == NULL) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
memcpy(copy, attr->bytes, attr->size);
|
||||
*val = copy;
|
||||
*size = attr->size;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast:get_proc_attr: found %d bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)attr->size,
|
||||
attribute_name, ORTE_NAME_PRINT(&proc)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* get here if attribute isn't found */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:mcast:get_proc_attr: no attr avail or zero byte size for proc %s attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc), attribute_name));
|
||||
*val = NULL;
|
||||
*size = 0;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void daemon_recv(int status,
|
||||
orte_rmcast_channel_t channel,
|
||||
orte_rmcast_tag_t tag,
|
||||
orte_process_name_t *sender,
|
||||
opal_buffer_t *buf, void* cbdata)
|
||||
{
|
||||
int32_t n;
|
||||
orte_rml_tag_t rmltag;
|
||||
int rc;
|
||||
|
||||
/* unpack the rml tag */
|
||||
n=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &rmltag, &n, ORTE_RML_TAG_T))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (tag) {
|
||||
case ORTE_RML_TAG_DAEMON:
|
||||
/* this is a cmd, so deliver it */
|
||||
ORTE_MESSAGE_EVENT(sender, buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||
break;
|
||||
|
||||
case ORTE_RML_TAG_ONESIDED_BARRIER:
|
||||
OPAL_THREAD_LOCK(&onesided_barrier.lock);
|
||||
onesided_barrier.recvd += 1;
|
||||
/* check for completion */
|
||||
if (orte_process_info.num_procs <= onesided_barrier.recvd) {
|
||||
opal_condition_broadcast(&onesided_barrier.cond);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&onesided_barrier.lock);
|
||||
break;
|
||||
|
||||
case ORTE_RML_TAG_BARRIER:
|
||||
OPAL_THREAD_LOCK(&barrier.lock);
|
||||
/* the recv is the trigger */
|
||||
barrier.recvd = 1;
|
||||
opal_condition_broadcast(&barrier.cond);
|
||||
OPAL_THREAD_UNLOCK(&barrier.lock);
|
||||
|
||||
break;
|
||||
|
||||
case ORTE_RML_TAG_ALLGATHER:
|
||||
OPAL_THREAD_LOCK(&allgather.lock);
|
||||
allgather.recvd += 1;
|
||||
/* xfer the data */
|
||||
opal_dss.copy_payload(&allgather.results, buf);
|
||||
/* check for completion */
|
||||
if (orte_process_info.num_procs <= allgather.recvd) {
|
||||
opal_condition_broadcast(&allgather.cond);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&allgather.lock);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* this function gets called when the daemon has received input from all
|
||||
* of its local procs
|
||||
*/
|
||||
void orte_grpcomm_mcast_daemon_coll(orte_process_name_t* sender, opal_buffer_t* buffer)
|
||||
{
|
||||
|
||||
/* we have to partially unpack the provided buffer so it can be
|
||||
* reconstructed properly for use here
|
||||
*/
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
||||
/* unpack the jobid */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &n, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* unpack the target tag */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rmltag, &n, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* pack things in the proper order */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &rmltag, 1, ORTE_RML_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &jobid, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
}
|
39
orte/mca/grpcomm/mcast/grpcomm_mcast.h
Обычный файл
39
orte/mca/grpcomm/mcast/grpcomm_mcast.h
Обычный файл
@ -0,0 +1,39 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
#ifndef GRPCOMM_MCAST_H
|
||||
#define GRPCOMM_MCAST_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Component open / close
|
||||
*/
|
||||
int orte_grpcomm_mcast_open(void);
|
||||
int orte_grpcomm_mcast_close(void);
|
||||
int orte_grpcomm_mcast_component_query(mca_base_module_t **module, int *priority);
|
||||
void orte_grpcomm_mcast_daemon_coll(orte_process_name_t* sender, opal_buffer_t* buffer);
|
||||
|
||||
|
||||
/*
|
||||
* Grpcomm interfaces
|
||||
*/
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_mcast_component;
|
||||
extern orte_grpcomm_base_module_t orte_grpcomm_mcast_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
76
orte/mca/grpcomm/mcast/grpcomm_mcast_component.c
Обычный файл
76
orte/mca/grpcomm/mcast/grpcomm_mcast_component.c
Обычный файл
@ -0,0 +1,76 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/mca/grpcomm/base/base.h"
|
||||
#include "grpcomm_mcast.h"
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_grpcomm_base_component_t mca_grpcomm_mcast_component = {
|
||||
{
|
||||
ORTE_GRPCOMM_BASE_VERSION_2_0_0,
|
||||
|
||||
"mcast", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
orte_grpcomm_mcast_open, /* module open */
|
||||
orte_grpcomm_mcast_close, /* module close */
|
||||
orte_grpcomm_mcast_component_query /* module query */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
};
|
||||
|
||||
/* Open the component */
|
||||
int orte_grpcomm_mcast_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_grpcomm_mcast_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_grpcomm_mcast_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* This component is selected only when requested */
|
||||
bool is_required = false;
|
||||
|
||||
mca_base_is_component_required(&orte_grpcomm_base.components_available,
|
||||
&mca_grpcomm_mcast_component.base_version,
|
||||
true,
|
||||
&is_required);
|
||||
|
||||
if( !is_required ) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t *)&orte_grpcomm_mcast_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -970,7 +970,10 @@ find_my_procs:
|
||||
}
|
||||
|
||||
/* flag that the launch msg has been processed so daemon collectives can proceed */
|
||||
OPAL_THREAD_LOCK(&jobdat->lock);
|
||||
jobdat->launch_msg_processed = true;
|
||||
opal_condition_broadcast(&jobdat->cond);
|
||||
OPAL_THREAD_UNLOCK(&jobdat->lock);
|
||||
|
||||
if (NULL != app_idx) {
|
||||
free(app_idx);
|
||||
|
@ -32,6 +32,8 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "orte/mca/plm/plm_types.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -107,6 +109,8 @@ OBJ_CLASS_INSTANCE(orte_odls_child_t,
|
||||
|
||||
static void orte_odls_job_constructor(orte_odls_job_t *ptr)
|
||||
{
|
||||
OBJ_CONSTRUCT(&ptr->lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&ptr->cond, opal_condition_t);
|
||||
ptr->jobid = ORTE_JOBID_INVALID;
|
||||
ptr->state = ORTE_JOB_STATE_UNDEF;
|
||||
ptr->launch_msg_processed = false;
|
||||
@ -133,6 +137,8 @@ static void orte_odls_job_destructor(orte_odls_job_t *ptr)
|
||||
{
|
||||
orte_std_cntr_t i;
|
||||
|
||||
OBJ_DESTRUCT(&ptr->lock);
|
||||
OBJ_DESTRUCT(&ptr->cond);
|
||||
if (NULL != ptr->apps) {
|
||||
for (i=0; i < ptr->num_apps; i++) {
|
||||
OBJ_RELEASE(ptr->apps[i]);
|
||||
|
@ -29,6 +29,8 @@
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "orte/mca/plm/plm_types.h"
|
||||
#include "orte/mca/grpcomm/grpcomm_types.h"
|
||||
@ -77,6 +79,10 @@ typedef uint8_t orte_daemon_cmd_flag_t;
|
||||
#define ORTE_DAEMON_CHECKIN_CMD (orte_daemon_cmd_flag_t) 24
|
||||
#define ORTE_TOOL_CHECKIN_CMD (orte_daemon_cmd_flag_t) 25
|
||||
|
||||
/* process msg command */
|
||||
#define ORTE_DAEMON_PROCESS_CMD (orte_daemon_cmd_flag_t) 26
|
||||
|
||||
|
||||
/*
|
||||
* List object to locally store the process names and pids of
|
||||
* our children. This can subsequently be used to order termination
|
||||
@ -110,6 +116,8 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_odls_child_t);
|
||||
*/
|
||||
typedef struct orte_odls_job_t {
|
||||
opal_list_item_t super; /* required to place this on a list */
|
||||
opal_mutex_t lock;
|
||||
opal_condition_t cond;
|
||||
orte_job_state_t state; /* state of the job */
|
||||
orte_jobid_t jobid; /* jobid for this data */
|
||||
bool launch_msg_processed; /* launch msg has been fully processed */
|
||||
|
@ -51,6 +51,7 @@ typedef int32_t orte_rmcast_tag_t;
|
||||
#define ORTE_RMCAST_TAG_ANNOUNCE 3
|
||||
#define ORTE_RMCAST_TAG_OUTPUT 4
|
||||
#define ORTE_RMCAST_TAG_PS 5
|
||||
#define ORTE_RMCAST_TAG_MSG 6
|
||||
|
||||
/* starting value for dynamically assignable tags */
|
||||
#define ORTE_RMCAST_TAG_DYNAMIC 100
|
||||
|
19
orte/mca/rmcast/tcp/configure.m4
Обычный файл
19
orte/mca/rmcast/tcp/configure.m4
Обычный файл
@ -0,0 +1,19 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_rmcast_tcp_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_rmcast_tcp_CONFIG], [$
|
||||
# if we don't want reliable multicast, don't compile
|
||||
# this component
|
||||
AS_IF([test "$orte_want_multicast" = "1"],
|
||||
[$1], [$2])
|
||||
])dnl
|
||||
|
19
orte/mca/rmcast/udp/configure.m4
Обычный файл
19
orte/mca/rmcast/udp/configure.m4
Обычный файл
@ -0,0 +1,19 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_rmcast_udp_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_rmcast_udp_CONFIG], [$
|
||||
# if we don't want reliable multicast, don't compile
|
||||
# this component
|
||||
AS_IF([test "$orte_want_multicast" = "1"],
|
||||
[$1], [$2])
|
||||
])dnl
|
||||
|
@ -65,6 +65,7 @@
|
||||
#include "orte/mca/plm/base/plm_private.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/rmcast/rmcast.h"
|
||||
|
||||
#include "orte/mca/odls/base/odls_private.h"
|
||||
|
||||
@ -178,7 +179,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
||||
int ret;
|
||||
ptrdiff_t unpack_rel, save_rel;
|
||||
orte_std_cntr_t n;
|
||||
orte_daemon_cmd_flag_t command;
|
||||
orte_daemon_cmd_flag_t command, cmd;
|
||||
|
||||
/* check to see if we are in a progress recursion */
|
||||
if (ORTE_PROC_IS_DAEMON && 1 < (ret = opal_progress_recursion_depth())) {
|
||||
@ -248,8 +249,9 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* see if this is a "process-and-relay" command - i.e., an xcast is underway */
|
||||
if (ORTE_DAEMON_PROCESS_AND_RELAY_CMD == command) {
|
||||
/* see if this is a "process-and-relay" or "process" command - i.e., an xcast is underway */
|
||||
if (ORTE_DAEMON_PROCESS_AND_RELAY_CMD == command ||
|
||||
ORTE_DAEMON_PROCESS_CMD == command) {
|
||||
/* get the target jobid and tag */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job, &n, ORTE_JOBID))) {
|
||||
@ -265,12 +267,12 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
||||
save_rel = buffer->unpack_ptr - buffer->base_ptr;
|
||||
/* unpack the command that will actually be executed */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) {
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &cmd, &n, ORTE_DAEMON_CMD))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* is this an add-procs cmd? */
|
||||
if (ORTE_DAEMON_ADD_LOCAL_PROCS == command) {
|
||||
if (ORTE_DAEMON_ADD_LOCAL_PROCS == cmd) {
|
||||
/* store the time the cmd was recvd */
|
||||
if (orte_timing) {
|
||||
orte_daemon_msg_recvd.tv_sec = mesg_recvd.tv_sec;
|
||||
@ -285,16 +287,19 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
||||
save_rel = buffer->unpack_ptr - buffer->base_ptr;
|
||||
}
|
||||
|
||||
/* setup the relay buffer */
|
||||
OBJ_CONSTRUCT(&relay_buf, opal_buffer_t);
|
||||
/* rewind the buffer to the beginning */
|
||||
buffer->unpack_ptr = buffer->base_ptr + unpack_rel;
|
||||
/* copy everything to the relay buffer */
|
||||
opal_dss.copy_payload(&relay_buf, buffer);
|
||||
/* do the relay */
|
||||
send_relay(&relay_buf);
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&relay_buf);
|
||||
if (ORTE_DAEMON_PROCESS_AND_RELAY_CMD == command) {
|
||||
/* need to relay it */
|
||||
/* setup the relay buffer */
|
||||
OBJ_CONSTRUCT(&relay_buf, opal_buffer_t);
|
||||
/* rewind the buffer to the beginning */
|
||||
buffer->unpack_ptr = buffer->base_ptr + unpack_rel;
|
||||
/* copy everything to the relay buffer */
|
||||
opal_dss.copy_payload(&relay_buf, buffer);
|
||||
/* do the relay */
|
||||
send_relay(&relay_buf);
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&relay_buf);
|
||||
}
|
||||
|
||||
/* rewind the buffer to the right place for processing the cmd */
|
||||
buffer->unpack_ptr = buffer->base_ptr + save_rel;
|
||||
|
@ -73,10 +73,8 @@
|
||||
#include "orte/mca/ras/base/ras_private.h"
|
||||
#include "orte/mca/rmaps/rmaps.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
#include "orte/mca/rmcast/rmcast.h"
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#endif
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
@ -312,7 +310,7 @@ void orte_info_open_components(void)
|
||||
}
|
||||
map = OBJ_NEW(orte_info_component_map_t);
|
||||
map->type = strdup("grpcomm");
|
||||
map->components = &mca_grpcomm_base_components_available;
|
||||
map->components = &orte_grpcomm_base.components_available;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
if (ORTE_SUCCESS != orte_ess_base_open()) {
|
||||
@ -372,7 +370,6 @@ void orte_info_open_components(void)
|
||||
map->components = &orte_rmaps_base.available_components;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
if (ORTE_SUCCESS != orte_rmcast_base_open()) {
|
||||
goto error;
|
||||
}
|
||||
@ -380,7 +377,6 @@ void orte_info_open_components(void)
|
||||
map->type = strdup("rmcast");
|
||||
map->components = &orte_rmcast_base.rmcast_opened;
|
||||
opal_pointer_array_add(&component_map, map);
|
||||
#endif
|
||||
|
||||
if (ORTE_SUCCESS != orte_rml_base_open()) {
|
||||
goto error;
|
||||
@ -488,10 +484,7 @@ void orte_info_close_components()
|
||||
(void) orte_rml_base_close();
|
||||
(void) orte_routed_base_close();
|
||||
(void) mca_oob_base_close();
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
|
||||
(void) orte_rmcast_base_close();
|
||||
#endif
|
||||
#endif
|
||||
(void) orte_errmgr_base_close();
|
||||
|
||||
|
@ -202,9 +202,7 @@ int main(int argc, char *argv[])
|
||||
opal_pointer_array_add(&mca_types, "odls");
|
||||
opal_pointer_array_add(&mca_types, "ras");
|
||||
opal_pointer_array_add(&mca_types, "rmaps");
|
||||
#if ORTE_ENABLE_MULTICAST
|
||||
opal_pointer_array_add(&mca_types, "rmcast");
|
||||
#endif
|
||||
opal_pointer_array_add(&mca_types, "rml");
|
||||
opal_pointer_array_add(&mca_types, "routed");
|
||||
opal_pointer_array_add(&mca_types, "plm");
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user