Mostly added a bunch of diagnostic messaging (only on when specific mca params are set) to try and debug the problems being observed in mpirun2. Found a couple of minor things that seem to now enable the system to run on the Mac. Believe these are all the changes involved - will check with another checkout.
This commit was SVN r2467.
Этот коммит содержится в:
родитель
ae1a20a354
Коммит
7fc8600483
@ -8,6 +8,9 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "util/output.h"
|
||||
|
||||
#include "mca/mca.h"
|
||||
#include "mca/gpr/base/base.h"
|
||||
#include "gpr_proxy.h"
|
||||
@ -78,10 +81,14 @@ int gpr_proxy_put(ompi_registry_mode_t mode, char *segment,
|
||||
ompi_buffer_t answer;
|
||||
mca_gpr_cmd_flag_t command;
|
||||
char **tokptr;
|
||||
int recv_tag, i;
|
||||
int recv_tag, i, ret;
|
||||
int32_t num_tokens, object_size;
|
||||
int16_t response;
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "gpr_proxy_put: entered for segment %s 1st token %s", segment, *tokens);
|
||||
}
|
||||
|
||||
command = MCA_GPR_PUT_CMD;
|
||||
recv_tag = MCA_OOB_TAG_GPR;
|
||||
|
||||
@ -131,7 +138,14 @@ int gpr_proxy_put(ompi_registry_mode_t mode, char *segment,
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) {
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "gpr_proxy_put: initiating send");
|
||||
}
|
||||
|
||||
if (0 > (ret = mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0))) {
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "gpr_proxy_put: send failed with return %d", ret);
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
@ -101,10 +101,18 @@ mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool *
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "gpr_proxy_init called");
|
||||
}
|
||||
|
||||
/* If we're NOT the seed, then we want to be selected, so do all
|
||||
the setup and return the module */
|
||||
if (!ompi_process_info.seed) {
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "gpr_proxy_init: proxy selected");
|
||||
}
|
||||
|
||||
/* Return a module (choose an arbitrary, positive priority --
|
||||
it's only relevant compared to other ns components). If
|
||||
we're not the seed, then we don't want to be selected, so
|
||||
@ -146,6 +154,10 @@ mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool *
|
||||
int mca_gpr_proxy_finalize(void)
|
||||
{
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "finalizing gpr proxy");
|
||||
}
|
||||
|
||||
if (initialized) {
|
||||
initialized = false;
|
||||
}
|
||||
@ -171,6 +183,10 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
|
||||
bool found;
|
||||
mca_gpr_notify_request_tracker_t *trackptr;
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "gpr proxy: received trigger message");
|
||||
}
|
||||
|
||||
message = OBJ_NEW(ompi_registry_notify_message_t);
|
||||
|
||||
if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) ||
|
||||
|
@ -65,7 +65,7 @@ int gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment,
|
||||
|
||||
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "gpr replica: put entered");
|
||||
ompi_output(0, "gpr replica: put entered on segment %s 1st token %s", segment, *tokens);
|
||||
}
|
||||
|
||||
/* protect ourselves against errors */
|
||||
@ -441,7 +441,7 @@ int gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode,
|
||||
ompi_registry_notify_message_t *notify_msg;
|
||||
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "gpr replica: synchro entered");
|
||||
ompi_output(0, "gpr replica: synchro entered on segment %s trigger %d", segment, trigger);
|
||||
}
|
||||
|
||||
/* protect against errors */
|
||||
|
@ -91,24 +91,6 @@ static void mca_gpr_replica_keytable_construct(mca_gpr_replica_keytable_t* keyta
|
||||
/* destructor - used to free any resources held by instance */
|
||||
static void mca_gpr_replica_keytable_destructor(mca_gpr_replica_keytable_t* keytable)
|
||||
{
|
||||
mca_gpr_replica_keytable_t *keyptr;
|
||||
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "entered keytable destructor");
|
||||
}
|
||||
|
||||
/* if (NULL != keytable) { */
|
||||
/* while (NULL != (keyptr = (mca_gpr_replica_keytable_t*)ompi_list_remove_first((ompi_list_t*)keytable))) { */
|
||||
/* if (NULL != keyptr->token) { */
|
||||
/* free(keyptr->token); */
|
||||
/* } */
|
||||
/* OBJ_RELEASE(keyptr); */
|
||||
/* } */
|
||||
/* } */
|
||||
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "exiting keytable destructor");
|
||||
}
|
||||
}
|
||||
|
||||
/* define instance of ompi_class_t */
|
||||
@ -368,14 +350,12 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool
|
||||
}
|
||||
|
||||
/* issue the non-blocking receive */
|
||||
if (!mca_gpr_replica_debug) {
|
||||
rc = mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR, 0, mca_gpr_replica_recv, NULL);
|
||||
if(rc != OMPI_SUCCESS && rc != OMPI_ERR_NOT_IMPLEMENTED) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mca_gpr_replica_debug) {
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "nb receive setup");
|
||||
}
|
||||
|
||||
@ -393,45 +373,49 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool
|
||||
*/
|
||||
int mca_gpr_replica_finalize(void)
|
||||
{
|
||||
mca_gpr_replica_segment_t *seg;
|
||||
mca_gpr_replica_keytable_t *kt;
|
||||
mca_gpr_replica_keylist_t *kl;
|
||||
mca_gpr_notify_request_tracker_t *tk;
|
||||
mca_gpr_idtag_list_t *id;
|
||||
|
||||
/* free all storage, but only if this component was initialized */
|
||||
|
||||
if (initialized) {
|
||||
|
||||
while (NULL != (seg = (mca_gpr_replica_segment_t*)ompi_list_remove_first(&mca_gpr_replica_head.registry))) {
|
||||
OBJ_RELEASE(seg);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_gpr_replica_head.registry);
|
||||
|
||||
while (NULL != (kt = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.segment_dict))) {
|
||||
OBJ_RELEASE(kt);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_gpr_replica_head.segment_dict);
|
||||
|
||||
while (NULL != (kl = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys))) {
|
||||
OBJ_RELEASE(kl);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_gpr_replica_head.freekeys);
|
||||
|
||||
|
||||
while (NULL != (tk = (mca_gpr_notify_request_tracker_t*)ompi_list_remove_first(&mca_gpr_replica_notify_request_tracker))) {
|
||||
OBJ_RELEASE(tk);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_gpr_replica_notify_request_tracker);
|
||||
|
||||
|
||||
while (NULL != (id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags))) {
|
||||
OBJ_RELEASE(id);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_gpr_replica_free_notify_id_tags);
|
||||
initialized = false;
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "finalizing gpr replica");
|
||||
}
|
||||
|
||||
/* mca_gpr_replica_segment_t *seg; */
|
||||
/* mca_gpr_replica_keytable_t *kt; */
|
||||
/* mca_gpr_replica_keylist_t *kl; */
|
||||
/* mca_gpr_notify_request_tracker_t *tk; */
|
||||
/* mca_gpr_idtag_list_t *id; */
|
||||
|
||||
/* /\* free all storage, but only if this component was initialized *\/ */
|
||||
|
||||
/* if (initialized) { */
|
||||
|
||||
/* while (NULL != (seg = (mca_gpr_replica_segment_t*)ompi_list_remove_first(&mca_gpr_replica_head.registry))) { */
|
||||
/* OBJ_RELEASE(seg); */
|
||||
/* } */
|
||||
/* OBJ_DESTRUCT(&mca_gpr_replica_head.registry); */
|
||||
|
||||
/* while (NULL != (kt = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.segment_dict))) { */
|
||||
/* OBJ_RELEASE(kt); */
|
||||
/* } */
|
||||
/* OBJ_DESTRUCT(&mca_gpr_replica_head.segment_dict); */
|
||||
|
||||
/* while (NULL != (kl = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys))) { */
|
||||
/* OBJ_RELEASE(kl); */
|
||||
/* } */
|
||||
/* OBJ_DESTRUCT(&mca_gpr_replica_head.freekeys); */
|
||||
|
||||
|
||||
/* while (NULL != (tk = (mca_gpr_notify_request_tracker_t*)ompi_list_remove_first(&mca_gpr_replica_notify_request_tracker))) { */
|
||||
/* OBJ_RELEASE(tk); */
|
||||
/* } */
|
||||
/* OBJ_DESTRUCT(&mca_gpr_replica_notify_request_tracker); */
|
||||
|
||||
|
||||
/* while (NULL != (id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags))) { */
|
||||
/* OBJ_RELEASE(id); */
|
||||
/* } */
|
||||
/* OBJ_DESTRUCT(&mca_gpr_replica_free_notify_id_tags); */
|
||||
/* initialized = false; */
|
||||
/* } */
|
||||
|
||||
/* All done */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -1098,6 +1082,10 @@ void gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient_tag
|
||||
char **tokptr;
|
||||
int recv_tag;
|
||||
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "sending trigger message");
|
||||
}
|
||||
|
||||
command = MCA_GPR_NOTIFY_CMD;
|
||||
recv_tag = MCA_OOB_TAG_GPR_NOTIFY;
|
||||
|
||||
|
@ -610,6 +610,10 @@ ompi_registry_notify_message_t *gpr_replica_construct_notify_message(ompi_regist
|
||||
char **tokptr, **tokptr2;
|
||||
int num_tokens, i;
|
||||
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "trigger fired on segment %s", segment);
|
||||
}
|
||||
|
||||
/* protect against errors */
|
||||
if (NULL == segment) {
|
||||
return NULL;
|
||||
|
@ -22,8 +22,7 @@ libmca_ns_base_la_SOURCES = \
|
||||
ns_base_close.c \
|
||||
ns_base_select.c \
|
||||
ns_base_open.c \
|
||||
ns_base_local_fns.c \
|
||||
ns_base_remote_fns.c
|
||||
ns_base_local_fns.c
|
||||
|
||||
|
||||
# Conditionally install the header files
|
||||
|
@ -78,7 +78,6 @@ extern "C" {
|
||||
|
||||
extern int mca_ns_base_output;
|
||||
extern mca_ns_base_module_t ompi_name_server; /* holds selected module's function pointers */
|
||||
extern ompi_process_name_t *mca_ns_my_replica; /* the name of the replica for this process */
|
||||
extern bool mca_ns_base_selected;
|
||||
extern ompi_list_t mca_ns_base_components_available;
|
||||
extern mca_ns_base_component_t mca_ns_base_selected_component;
|
||||
|
@ -27,7 +27,6 @@
|
||||
*/
|
||||
int mca_ns_base_output = -1;
|
||||
mca_ns_base_module_t ompi_name_server;
|
||||
ompi_process_name_t *mca_ns_my_replica;
|
||||
bool mca_ns_base_selected = false;
|
||||
ompi_list_t mca_ns_base_components_available;
|
||||
mca_ns_base_component_t mca_ns_base_selected_component;
|
||||
|
@ -7,4 +7,5 @@ include $(top_ompi_srcdir)/config/Makefile.options
|
||||
noinst_LTLIBRARIES = libmca_ns_proxy.la
|
||||
libmca_ns_proxy_la_SOURCES = \
|
||||
ns_proxy.h \
|
||||
ns_proxy.c \
|
||||
ns_proxy_component.c
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "util/pack.h"
|
||||
#include "mca/oob/base/base.h"
|
||||
#include "mca/ns/base/base.h"
|
||||
#include "ns_proxy.h"
|
||||
|
||||
/**
|
||||
* globals
|
||||
@ -20,7 +21,7 @@
|
||||
* functions
|
||||
*/
|
||||
|
||||
mca_ns_base_cellid_t ns_base_create_cellid(void)
|
||||
mca_ns_base_cellid_t ns_proxy_create_cellid(void)
|
||||
{
|
||||
ompi_buffer_t cmd;
|
||||
mca_ns_base_cellid_t cell;
|
||||
@ -63,7 +64,7 @@ mca_ns_base_cellid_t ns_base_create_cellid(void)
|
||||
}
|
||||
|
||||
|
||||
mca_ns_base_jobid_t ns_base_create_jobid(void)
|
||||
mca_ns_base_jobid_t ns_proxy_create_jobid(void)
|
||||
{
|
||||
ompi_buffer_t cmd;
|
||||
mca_ns_base_jobid_t job;
|
||||
@ -106,7 +107,7 @@ mca_ns_base_jobid_t ns_base_create_jobid(void)
|
||||
}
|
||||
|
||||
|
||||
mca_ns_base_vpid_t ns_base_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range)
|
||||
mca_ns_base_vpid_t ns_proxy_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range)
|
||||
{
|
||||
ompi_buffer_t cmd;
|
||||
mca_ns_base_vpid_t starting_vpid;
|
@ -30,4 +30,18 @@ int mca_ns_proxy_finalize(void);
|
||||
* globals used within proxy component
|
||||
*/
|
||||
|
||||
extern ompi_process_name_t *mca_ns_my_replica;
|
||||
extern int mca_ns_proxy_debug;
|
||||
|
||||
/*
|
||||
* proxy function prototypes
|
||||
*/
|
||||
mca_ns_base_cellid_t ns_proxy_create_cellid(void);
|
||||
|
||||
mca_ns_base_jobid_t ns_proxy_create_jobid(void);
|
||||
|
||||
mca_ns_base_vpid_t ns_proxy_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range);
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -51,13 +51,13 @@ mca_ns_base_component_t mca_ns_proxy_component = {
|
||||
* setup the function pointers for the module
|
||||
*/
|
||||
static mca_ns_base_module_t mca_ns_proxy = {
|
||||
ns_base_create_cellid,
|
||||
ns_proxy_create_cellid,
|
||||
ns_base_assign_cellid_to_process,
|
||||
ns_base_create_jobid,
|
||||
ns_proxy_create_jobid,
|
||||
ns_base_create_process_name,
|
||||
ns_base_copy_process_name,
|
||||
ns_base_convert_string_to_process_name,
|
||||
ns_base_reserve_range,
|
||||
ns_proxy_reserve_range,
|
||||
ns_base_free_name,
|
||||
ns_base_get_proc_name_string,
|
||||
ns_base_get_vpid_string,
|
||||
@ -79,12 +79,18 @@ static bool initialized = false;
|
||||
*/
|
||||
|
||||
ompi_process_name_t *mca_ns_my_replica;
|
||||
int mca_ns_proxy_debug;
|
||||
|
||||
/*
|
||||
* Open the proxy component and obtain the name of my replica.
|
||||
*/
|
||||
int mca_ns_proxy_open(void)
|
||||
{
|
||||
int id;
|
||||
|
||||
id = mca_base_param_register_int("ns", "proxy", "debug", NULL, 0);
|
||||
mca_base_param_lookup_int(id, &mca_ns_proxy_debug);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -136,6 +142,10 @@ mca_ns_base_module_t* mca_ns_proxy_init(bool *allow_multi_user_threads, bool *ha
|
||||
*/
|
||||
int mca_ns_proxy_finalize(void)
|
||||
{
|
||||
if (mca_ns_proxy_debug) {
|
||||
ompi_output(0, "finalizing ns proxy");
|
||||
}
|
||||
|
||||
/* free all tracking storage, but only if this component was initialized */
|
||||
|
||||
if (initialized) {
|
||||
|
@ -33,6 +33,7 @@ OBJ_CLASS_DECLARATION(mca_ns_replica_name_tracker_t);
|
||||
extern mca_ns_base_cellid_t mca_ns_replica_last_used_cellid;
|
||||
extern mca_ns_base_jobid_t mca_ns_replica_last_used_jobid;
|
||||
extern ompi_list_t mca_ns_replica_name_tracker;
|
||||
extern int mca_ns_replica_debug;
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "util/proc_info.h"
|
||||
#include "util/output.h"
|
||||
#include "mca/mca.h"
|
||||
#include "mca/base/mca_base_param.h"
|
||||
#include "mca/oob/base/base.h"
|
||||
#include "mca/ns/base/base.h"
|
||||
#include "ns_replica.h"
|
||||
@ -100,6 +101,7 @@ OBJ_CLASS_INSTANCE(
|
||||
mca_ns_base_cellid_t mca_ns_replica_last_used_cellid;
|
||||
mca_ns_base_jobid_t mca_ns_replica_last_used_jobid;
|
||||
ompi_list_t mca_ns_replica_name_tracker;
|
||||
int mca_ns_replica_debug;
|
||||
|
||||
/*
|
||||
* don't really need this function - could just put NULL in the above structure
|
||||
@ -107,6 +109,11 @@ ompi_list_t mca_ns_replica_name_tracker;
|
||||
*/
|
||||
int mca_ns_replica_open(void)
|
||||
{
|
||||
int id;
|
||||
|
||||
id = mca_base_param_register_int("ns", "replica", "debug", NULL, 0);
|
||||
mca_base_param_lookup_int(id, &mca_ns_replica_debug);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -145,11 +152,7 @@ mca_ns_base_module_t* mca_ns_replica_init(bool *allow_multi_user_threads, bool *
|
||||
|
||||
OBJ_CONSTRUCT(&mca_ns_replica_name_tracker, ompi_list_t);
|
||||
|
||||
/* set my_replica to point to myself */
|
||||
|
||||
/* mca_ns_my_replica = mca_ns_replica.copy_process_name(ompi_process_info.name); */
|
||||
|
||||
/* Return the module */
|
||||
/* Return the module */
|
||||
|
||||
initialized = true;
|
||||
|
||||
@ -171,13 +174,17 @@ mca_ns_base_module_t* mca_ns_replica_init(bool *allow_multi_user_threads, bool *
|
||||
*/
|
||||
int mca_ns_replica_finalize(void)
|
||||
{
|
||||
if (mca_ns_replica_debug) {
|
||||
ompi_output(0, "finalizing ns replica");
|
||||
}
|
||||
|
||||
/* free all tracking storage, but only if this component was initialized */
|
||||
|
||||
if (initialized) {
|
||||
OBJ_DESTRUCT(&mca_ns_replica_name_tracker);
|
||||
/* if (initialized) { */
|
||||
/* OBJ_DESTRUCT(&mca_ns_replica_name_tracker); */
|
||||
|
||||
initialized = false;
|
||||
}
|
||||
/* } */
|
||||
|
||||
/* All done */
|
||||
|
||||
|
@ -50,7 +50,7 @@ extern ompi_process_name_t mca_oob_name_self;
|
||||
#define MCA_OOB_TAG_GPR_NOTIFY 3
|
||||
#define MCA_OOB_TAG_RTE 4
|
||||
#define MCA_OOB_TAG_EXEC 5
|
||||
|
||||
#define MCA_OOB_TAG_DAEMON 6
|
||||
|
||||
/*
|
||||
* OOB API
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user