1
1

Mostly added a bunch of diagnostic messaging (only on when specific mca params are set) to try and debug the problems being observed in mpirun2. Found a couple of minor things that seem to now enable the system to run on the Mac. Believe these are all the changes involved - will check with another checkout.

This commit was SVN r2467.
Этот коммит содержится в:
Ralph Castain 2004-09-03 13:54:34 +00:00
родитель ae1a20a354
Коммит 7fc8600483
15 изменённых файлов: 134 добавлений и 81 удалений

Просмотреть файл

@ -8,6 +8,9 @@
#include "ompi_config.h"
#include <string.h>
#include "util/output.h"
#include "mca/mca.h"
#include "mca/gpr/base/base.h"
#include "gpr_proxy.h"
@ -78,10 +81,14 @@ int gpr_proxy_put(ompi_registry_mode_t mode, char *segment,
ompi_buffer_t answer;
mca_gpr_cmd_flag_t command;
char **tokptr;
int recv_tag, i;
int recv_tag, i, ret;
int32_t num_tokens, object_size;
int16_t response;
if (mca_gpr_proxy_debug) {
ompi_output(0, "gpr_proxy_put: entered for segment %s 1st token %s", segment, *tokens);
}
command = MCA_GPR_PUT_CMD;
recv_tag = MCA_OOB_TAG_GPR;
@ -131,7 +138,14 @@ int gpr_proxy_put(ompi_registry_mode_t mode, char *segment,
return OMPI_ERROR;
}
if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) {
if (mca_gpr_proxy_debug) {
ompi_output(0, "gpr_proxy_put: initiating send");
}
if (0 > (ret = mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0))) {
if (mca_gpr_proxy_debug) {
ompi_output(0, "gpr_proxy_put: send failed with return %d", ret);
}
return OMPI_ERROR;
}

Просмотреть файл

@ -101,10 +101,18 @@ mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool *
{
int rc;
if (mca_gpr_proxy_debug) {
ompi_output(0, "gpr_proxy_init called");
}
/* If we're NOT the seed, then we want to be selected, so do all
the setup and return the module */
if (!ompi_process_info.seed) {
if (mca_gpr_proxy_debug) {
ompi_output(0, "gpr_proxy_init: proxy selected");
}
/* Return a module (choose an arbitrary, positive priority --
it's only relevant compared to other ns components). If
we're not the seed, then we don't want to be selected, so
@ -146,6 +154,10 @@ mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool *
int mca_gpr_proxy_finalize(void)
{
if (mca_gpr_proxy_debug) {
ompi_output(0, "finalizing gpr proxy");
}
if (initialized) {
initialized = false;
}
@ -171,6 +183,10 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
bool found;
mca_gpr_notify_request_tracker_t *trackptr;
if (mca_gpr_proxy_debug) {
ompi_output(0, "gpr proxy: received trigger message");
}
message = OBJ_NEW(ompi_registry_notify_message_t);
if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) ||

Просмотреть файл

@ -65,7 +65,7 @@ int gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment,
if (mca_gpr_replica_debug) {
ompi_output(0, "gpr replica: put entered");
ompi_output(0, "gpr replica: put entered on segment %s 1st token %s", segment, *tokens);
}
/* protect ourselves against errors */
@ -441,7 +441,7 @@ int gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode,
ompi_registry_notify_message_t *notify_msg;
if (mca_gpr_replica_debug) {
ompi_output(0, "gpr replica: synchro entered");
ompi_output(0, "gpr replica: synchro entered on segment %s trigger %d", segment, trigger);
}
/* protect against errors */

Просмотреть файл

@ -91,24 +91,6 @@ static void mca_gpr_replica_keytable_construct(mca_gpr_replica_keytable_t* keyta
/* destructor - used to free any resources held by instance */
static void mca_gpr_replica_keytable_destructor(mca_gpr_replica_keytable_t* keytable)
{
mca_gpr_replica_keytable_t *keyptr;
if (mca_gpr_replica_debug) {
ompi_output(0, "entered keytable destructor");
}
/* if (NULL != keytable) { */
/* while (NULL != (keyptr = (mca_gpr_replica_keytable_t*)ompi_list_remove_first((ompi_list_t*)keytable))) { */
/* if (NULL != keyptr->token) { */
/* free(keyptr->token); */
/* } */
/* OBJ_RELEASE(keyptr); */
/* } */
/* } */
if (mca_gpr_replica_debug) {
ompi_output(0, "exiting keytable destructor");
}
}
/* define instance of ompi_class_t */
@ -368,14 +350,12 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool
}
/* issue the non-blocking receive */
if (!mca_gpr_replica_debug) {
rc = mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR, 0, mca_gpr_replica_recv, NULL);
if(rc != OMPI_SUCCESS && rc != OMPI_ERR_NOT_IMPLEMENTED) {
return NULL;
}
}
if (!mca_gpr_replica_debug) {
if (mca_gpr_replica_debug) {
ompi_output(0, "nb receive setup");
}
@ -393,45 +373,49 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool
*/
int mca_gpr_replica_finalize(void)
{
mca_gpr_replica_segment_t *seg;
mca_gpr_replica_keytable_t *kt;
mca_gpr_replica_keylist_t *kl;
mca_gpr_notify_request_tracker_t *tk;
mca_gpr_idtag_list_t *id;
/* free all storage, but only if this component was initialized */
if (initialized) {
while (NULL != (seg = (mca_gpr_replica_segment_t*)ompi_list_remove_first(&mca_gpr_replica_head.registry))) {
OBJ_RELEASE(seg);
}
OBJ_DESTRUCT(&mca_gpr_replica_head.registry);
while (NULL != (kt = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.segment_dict))) {
OBJ_RELEASE(kt);
}
OBJ_DESTRUCT(&mca_gpr_replica_head.segment_dict);
while (NULL != (kl = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys))) {
OBJ_RELEASE(kl);
}
OBJ_DESTRUCT(&mca_gpr_replica_head.freekeys);
while (NULL != (tk = (mca_gpr_notify_request_tracker_t*)ompi_list_remove_first(&mca_gpr_replica_notify_request_tracker))) {
OBJ_RELEASE(tk);
}
OBJ_DESTRUCT(&mca_gpr_replica_notify_request_tracker);
while (NULL != (id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags))) {
OBJ_RELEASE(id);
}
OBJ_DESTRUCT(&mca_gpr_replica_free_notify_id_tags);
initialized = false;
if (mca_gpr_replica_debug) {
ompi_output(0, "finalizing gpr replica");
}
/* mca_gpr_replica_segment_t *seg; */
/* mca_gpr_replica_keytable_t *kt; */
/* mca_gpr_replica_keylist_t *kl; */
/* mca_gpr_notify_request_tracker_t *tk; */
/* mca_gpr_idtag_list_t *id; */
/* /\* free all storage, but only if this component was initialized *\/ */
/* if (initialized) { */
/* while (NULL != (seg = (mca_gpr_replica_segment_t*)ompi_list_remove_first(&mca_gpr_replica_head.registry))) { */
/* OBJ_RELEASE(seg); */
/* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_head.registry); */
/* while (NULL != (kt = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.segment_dict))) { */
/* OBJ_RELEASE(kt); */
/* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_head.segment_dict); */
/* while (NULL != (kl = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys))) { */
/* OBJ_RELEASE(kl); */
/* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_head.freekeys); */
/* while (NULL != (tk = (mca_gpr_notify_request_tracker_t*)ompi_list_remove_first(&mca_gpr_replica_notify_request_tracker))) { */
/* OBJ_RELEASE(tk); */
/* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_notify_request_tracker); */
/* while (NULL != (id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags))) { */
/* OBJ_RELEASE(id); */
/* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_free_notify_id_tags); */
/* initialized = false; */
/* } */
/* All done */
return OMPI_SUCCESS;
@ -1098,6 +1082,10 @@ void gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient_tag
char **tokptr;
int recv_tag;
if (mca_gpr_replica_debug) {
ompi_output(0, "sending trigger message");
}
command = MCA_GPR_NOTIFY_CMD;
recv_tag = MCA_OOB_TAG_GPR_NOTIFY;

Просмотреть файл

@ -610,6 +610,10 @@ ompi_registry_notify_message_t *gpr_replica_construct_notify_message(ompi_regist
char **tokptr, **tokptr2;
int num_tokens, i;
if (mca_gpr_replica_debug) {
ompi_output(0, "trigger fired on segment %s", segment);
}
/* protect against errors */
if (NULL == segment) {
return NULL;

Просмотреть файл

@ -22,8 +22,7 @@ libmca_ns_base_la_SOURCES = \
ns_base_close.c \
ns_base_select.c \
ns_base_open.c \
ns_base_local_fns.c \
ns_base_remote_fns.c
ns_base_local_fns.c
# Conditionally install the header files

Просмотреть файл

@ -78,7 +78,6 @@ extern "C" {
extern int mca_ns_base_output;
extern mca_ns_base_module_t ompi_name_server; /* holds selected module's function pointers */
extern ompi_process_name_t *mca_ns_my_replica; /* the name of the replica for this process */
extern bool mca_ns_base_selected;
extern ompi_list_t mca_ns_base_components_available;
extern mca_ns_base_component_t mca_ns_base_selected_component;

Просмотреть файл

@ -27,7 +27,6 @@
*/
int mca_ns_base_output = -1;
mca_ns_base_module_t ompi_name_server;
ompi_process_name_t *mca_ns_my_replica;
bool mca_ns_base_selected = false;
ompi_list_t mca_ns_base_components_available;
mca_ns_base_component_t mca_ns_base_selected_component;

Просмотреть файл

@ -7,4 +7,5 @@ include $(top_ompi_srcdir)/config/Makefile.options
noinst_LTLIBRARIES = libmca_ns_proxy.la
libmca_ns_proxy_la_SOURCES = \
ns_proxy.h \
ns_proxy.c \
ns_proxy_component.c

Просмотреть файл

@ -11,6 +11,7 @@
#include "util/pack.h"
#include "mca/oob/base/base.h"
#include "mca/ns/base/base.h"
#include "ns_proxy.h"
/**
* globals
@ -20,7 +21,7 @@
* functions
*/
mca_ns_base_cellid_t ns_base_create_cellid(void)
mca_ns_base_cellid_t ns_proxy_create_cellid(void)
{
ompi_buffer_t cmd;
mca_ns_base_cellid_t cell;
@ -63,7 +64,7 @@ mca_ns_base_cellid_t ns_base_create_cellid(void)
}
mca_ns_base_jobid_t ns_base_create_jobid(void)
mca_ns_base_jobid_t ns_proxy_create_jobid(void)
{
ompi_buffer_t cmd;
mca_ns_base_jobid_t job;
@ -106,7 +107,7 @@ mca_ns_base_jobid_t ns_base_create_jobid(void)
}
mca_ns_base_vpid_t ns_base_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range)
mca_ns_base_vpid_t ns_proxy_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range)
{
ompi_buffer_t cmd;
mca_ns_base_vpid_t starting_vpid;

Просмотреть файл

@ -30,4 +30,18 @@ int mca_ns_proxy_finalize(void);
* globals used within proxy component
*/
extern ompi_process_name_t *mca_ns_my_replica;
extern int mca_ns_proxy_debug;
/*
* proxy function prototypes
*/
mca_ns_base_cellid_t ns_proxy_create_cellid(void);
mca_ns_base_jobid_t ns_proxy_create_jobid(void);
mca_ns_base_vpid_t ns_proxy_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range);
#endif

Просмотреть файл

@ -51,13 +51,13 @@ mca_ns_base_component_t mca_ns_proxy_component = {
* setup the function pointers for the module
*/
static mca_ns_base_module_t mca_ns_proxy = {
ns_base_create_cellid,
ns_proxy_create_cellid,
ns_base_assign_cellid_to_process,
ns_base_create_jobid,
ns_proxy_create_jobid,
ns_base_create_process_name,
ns_base_copy_process_name,
ns_base_convert_string_to_process_name,
ns_base_reserve_range,
ns_proxy_reserve_range,
ns_base_free_name,
ns_base_get_proc_name_string,
ns_base_get_vpid_string,
@ -79,12 +79,18 @@ static bool initialized = false;
*/
ompi_process_name_t *mca_ns_my_replica;
int mca_ns_proxy_debug;
/*
* Open the proxy component and obtain the name of my replica.
*/
int mca_ns_proxy_open(void)
{
int id;
id = mca_base_param_register_int("ns", "proxy", "debug", NULL, 0);
mca_base_param_lookup_int(id, &mca_ns_proxy_debug);
return OMPI_SUCCESS;
}
@ -136,6 +142,10 @@ mca_ns_base_module_t* mca_ns_proxy_init(bool *allow_multi_user_threads, bool *ha
*/
int mca_ns_proxy_finalize(void)
{
if (mca_ns_proxy_debug) {
ompi_output(0, "finalizing ns proxy");
}
/* free all tracking storage, but only if this component was initialized */
if (initialized) {

Просмотреть файл

@ -33,6 +33,7 @@ OBJ_CLASS_DECLARATION(mca_ns_replica_name_tracker_t);
extern mca_ns_base_cellid_t mca_ns_replica_last_used_cellid;
extern mca_ns_base_jobid_t mca_ns_replica_last_used_jobid;
extern ompi_list_t mca_ns_replica_name_tracker;
extern int mca_ns_replica_debug;
/*
* Module open / close

Просмотреть файл

@ -21,6 +21,7 @@
#include "util/proc_info.h"
#include "util/output.h"
#include "mca/mca.h"
#include "mca/base/mca_base_param.h"
#include "mca/oob/base/base.h"
#include "mca/ns/base/base.h"
#include "ns_replica.h"
@ -100,6 +101,7 @@ OBJ_CLASS_INSTANCE(
mca_ns_base_cellid_t mca_ns_replica_last_used_cellid;
mca_ns_base_jobid_t mca_ns_replica_last_used_jobid;
ompi_list_t mca_ns_replica_name_tracker;
int mca_ns_replica_debug;
/*
* don't really need this function - could just put NULL in the above structure
@ -107,6 +109,11 @@ ompi_list_t mca_ns_replica_name_tracker;
*/
int mca_ns_replica_open(void)
{
int id;
id = mca_base_param_register_int("ns", "replica", "debug", NULL, 0);
mca_base_param_lookup_int(id, &mca_ns_replica_debug);
return OMPI_SUCCESS;
}
@ -145,11 +152,7 @@ mca_ns_base_module_t* mca_ns_replica_init(bool *allow_multi_user_threads, bool *
OBJ_CONSTRUCT(&mca_ns_replica_name_tracker, ompi_list_t);
/* set my_replica to point to myself */
/* mca_ns_my_replica = mca_ns_replica.copy_process_name(ompi_process_info.name); */
/* Return the module */
/* Return the module */
initialized = true;
@ -171,13 +174,17 @@ mca_ns_base_module_t* mca_ns_replica_init(bool *allow_multi_user_threads, bool *
*/
int mca_ns_replica_finalize(void)
{
if (mca_ns_replica_debug) {
ompi_output(0, "finalizing ns replica");
}
/* free all tracking storage, but only if this component was initialized */
if (initialized) {
OBJ_DESTRUCT(&mca_ns_replica_name_tracker);
/* if (initialized) { */
/* OBJ_DESTRUCT(&mca_ns_replica_name_tracker); */
initialized = false;
}
/* } */
/* All done */

Просмотреть файл

@ -50,7 +50,7 @@ extern ompi_process_name_t mca_oob_name_self;
#define MCA_OOB_TAG_GPR_NOTIFY 3
#define MCA_OOB_TAG_RTE 4
#define MCA_OOB_TAG_EXEC 5
#define MCA_OOB_TAG_DAEMON 6
/*
* OOB API