2006-09-18 21:32:04 +04:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
|
|
|
/**********************************************************************
|
|
|
|
* Copyright (C) 2000-2004 by Etnus, LLC.
|
|
|
|
* Copyright (C) 1999 by Etnus, Inc.
|
|
|
|
* Copyright (C) 1997-1998 Dolphin Interconnect Solutions Inc.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted to use, reproduce, prepare derivative
|
|
|
|
* works, and to redistribute to others.
|
|
|
|
*
|
|
|
|
* DISCLAIMER
|
|
|
|
*
|
|
|
|
* Neither Dolphin Interconnect Solutions, Etnus LLC, nor any of their
|
|
|
|
* employees, makes any warranty express or implied, or assumes any
|
|
|
|
* legal liability or responsibility for the accuracy, completeness,
|
|
|
|
* or usefulness of any information, apparatus, product, or process
|
|
|
|
* disclosed, or represents that its use would not infringe privately
|
|
|
|
* owned rights.
|
|
|
|
*
|
|
|
|
* This code was written by
|
|
|
|
* James Cownie: Dolphin Interconnect Solutions. <jcownie@dolphinics.com>
|
|
|
|
* Etnus LLC <jcownie@etnus.com>
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
|
|
/* Update log
|
|
|
|
*
|
|
|
|
* Jul 12 2001 FNW: Add a meaningful ID to the communicator name, and switch
|
|
|
|
* to using the recv_context as the unique_id field.
|
|
|
|
* Mar 6 2001 JHC: Add mqs_get_comm_group to allow a debugger to acquire
|
|
|
|
* processes less eagerly.
|
|
|
|
* Dec 13 2000 JHC: totalview/2514: Modify image_has_queues to return
|
|
|
|
* a silent FALSE if none of the expected data is
|
|
|
|
* present. This way you won't get complaints when
|
|
|
|
* you try this on non MPICH processes.
|
|
|
|
* Sep 8 2000 JVD: #include <string.h> to silence Linux Alpha compiler warnings.
|
|
|
|
* Mar 21 2000 JHC: Add the new entrypoint mqs_dll_taddr_width
|
|
|
|
* Nov 26 1998 JHC: Fix the problem that we weren't handling
|
|
|
|
* MPIR_Ignore_queues properly.
|
|
|
|
* Oct 22 1998 JHC: Fix a zero allocation problem
|
|
|
|
* Aug 19 1998 JHC: Fix some problems in our use of target_to_host on
|
|
|
|
* big endian machines.
|
|
|
|
* May 28 1998 JHC: Use the extra information we can return to say
|
|
|
|
* explicitly that sends are only showing non-blocking ops
|
|
|
|
* May 19 1998 JHC: Changed the names of the structs and added casts
|
|
|
|
* where needed to reflect the change to the way we handle
|
|
|
|
* type safety across the interface.
|
|
|
|
* Oct 27 1997 JHC: Created by exploding db_message_state_mpich.cxx
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Right now there is no MPI2 support
|
|
|
|
*/
|
|
|
|
#define FOR_MPI2 0
|
|
|
|
|
|
|
|
/*
|
|
|
|
The following was added by William Gropp to improve the portability
|
|
|
|
to systems with non-ANSI C compilers
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
|
|
|
|
#ifdef HAVE_NO_C_CONST
|
|
|
|
#define const
|
|
|
|
#endif
|
|
|
|
#if defined(HAVE_STRING_H)
|
|
|
|
#include <string.h>
|
|
|
|
#endif /* defined(HAVE_STRING_H) */
|
|
|
|
#if defined(HAVE_STDLIB_H)
|
|
|
|
#include <stdlib.h>
|
|
|
|
#endif /* defined(HAVE_STDLIB_H) */
|
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
#include "ompi/mca/pml/base/pml_base_request.h"
|
2006-09-19 10:31:42 +04:00
|
|
|
|
2006-09-18 21:32:04 +04:00
|
|
|
/*
|
|
|
|
End of inclusion
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "mpi_interface.h"
|
|
|
|
#include "ompi_dll_defs.h"
|
|
|
|
|
|
|
|
/* Essential macros for C */
|
|
|
|
#ifndef NULL
|
|
|
|
#define NULL ((void *)0)
|
|
|
|
#endif
|
|
|
|
#ifndef TRUE
|
|
|
|
#define TRUE (0==0)
|
|
|
|
#endif
|
|
|
|
#ifndef FALSE
|
|
|
|
#define FALSE (0==1)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef OLD_STYLE_CPP_CONCAT
|
|
|
|
#define concat(a,b) a/**/b
|
|
|
|
#define stringize(a) "a"
|
|
|
|
#else
|
|
|
|
#define concat(a,b) a##b
|
|
|
|
#define stringize(a) #a
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Set up the basic callbacks into the debugger, also work out
|
|
|
|
* one crucial piece of info about the machine we're running on.
|
|
|
|
*/
|
|
|
|
static const mqs_basic_callbacks *mqs_basic_entrypoints;
|
|
|
|
static int host_is_big_endian;
|
|
|
|
|
2007-08-09 17:52:23 +04:00
|
|
|
void mqs_setup_basic_callbacks (const mqs_basic_callbacks * cb)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
int t = 1;
|
|
|
|
|
|
|
|
host_is_big_endian = (*(char *)&t) != 1;
|
|
|
|
mqs_basic_entrypoints = cb;
|
|
|
|
} /* mqs_setup_callbacks */
|
|
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Macros to make it transparent that we're calling the TV functions
|
|
|
|
* through function pointers.
|
|
|
|
*/
|
|
|
|
#define mqs_malloc (mqs_basic_entrypoints->mqs_malloc_fp)
|
|
|
|
#define mqs_free (mqs_basic_entrypoints->mqs_free_fp)
|
|
|
|
#define mqs_prints (mqs_basic_entrypoints->mqs_dprints_fp)
|
|
|
|
#define mqs_put_image_info (mqs_basic_entrypoints->mqs_put_image_info_fp)
|
|
|
|
#define mqs_get_image_info (mqs_basic_entrypoints->mqs_get_image_info_fp)
|
|
|
|
#define mqs_put_process_info (mqs_basic_entrypoints->mqs_put_process_info_fp)
|
|
|
|
#define mqs_get_process_info (mqs_basic_entrypoints->mqs_get_process_info_fp)
|
|
|
|
|
|
|
|
/* These macros *RELY* on the function already having set up the conventional
|
|
|
|
* local variables i_info or p_info.
|
|
|
|
*/
|
|
|
|
#define mqs_find_type (i_info->image_callbacks->mqs_find_type_fp)
|
|
|
|
#define mqs_field_offset (i_info->image_callbacks->mqs_field_offset_fp)
|
|
|
|
#define mqs_sizeof (i_info->image_callbacks->mqs_sizeof_fp)
|
|
|
|
#define mqs_get_type_sizes (i_info->image_callbacks->mqs_get_type_sizes_fp)
|
|
|
|
#define mqs_find_function (i_info->image_callbacks->mqs_find_function_fp)
|
|
|
|
#define mqs_find_symbol (i_info->image_callbacks->mqs_find_symbol_fp)
|
|
|
|
|
|
|
|
#define mqs_get_image (p_info->process_callbacks->mqs_get_image_fp)
|
|
|
|
#define mqs_get_global_rank (p_info->process_callbacks->mqs_get_global_rank_fp)
|
|
|
|
#define mqs_fetch_data (p_info->process_callbacks->mqs_fetch_data_fp)
|
|
|
|
#define mqs_target_to_host (p_info->process_callbacks->mqs_target_to_host_fp)
|
|
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Version handling functions.
|
|
|
|
* This one should never be changed.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_version_compatibility (void)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
return MQS_INTERFACE_COMPATIBILITY;
|
|
|
|
} /* mqs_version_compatibility */
|
|
|
|
|
|
|
|
/* This one can say what you like */
|
2007-08-09 17:52:23 +04:00
|
|
|
char *mqs_version_string (void)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
return "Open MPI message queue support for parallel debuggers compiled on " __DATE__;
|
|
|
|
} /* mqs_version_string */
|
|
|
|
|
|
|
|
/* So the debugger can tell what interface width the library was compiled with */
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_dll_taddr_width (void)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
return sizeof (mqs_taddr_t);
|
|
|
|
} /* mqs_dll_taddr_width */
|
|
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Additional error codes and error string conversion.
|
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
err_silent_failure = mqs_first_user_code,
|
|
|
|
|
|
|
|
err_no_current_communicator,
|
|
|
|
err_bad_request,
|
|
|
|
err_no_store,
|
|
|
|
|
|
|
|
err_failed_qhdr,
|
|
|
|
err_unexpected,
|
|
|
|
err_posted,
|
|
|
|
|
|
|
|
err_failed_queue,
|
|
|
|
err_first,
|
|
|
|
|
|
|
|
err_context_id,
|
|
|
|
err_tag,
|
|
|
|
err_tagmask,
|
|
|
|
err_lsrc,
|
|
|
|
err_srcmask,
|
|
|
|
err_next,
|
|
|
|
err_ptr,
|
|
|
|
|
|
|
|
err_missing_type,
|
|
|
|
err_missing_symbol,
|
|
|
|
|
|
|
|
err_db_shandle,
|
|
|
|
err_db_comm,
|
|
|
|
err_db_target,
|
|
|
|
err_db_tag,
|
|
|
|
err_db_data,
|
|
|
|
err_db_byte_length,
|
|
|
|
err_db_next,
|
|
|
|
|
|
|
|
err_failed_rhandle,
|
|
|
|
err_is_complete,
|
|
|
|
err_buf,
|
|
|
|
err_len,
|
|
|
|
err_s,
|
|
|
|
|
|
|
|
err_failed_status,
|
|
|
|
err_count,
|
|
|
|
err_MPI_SOURCE,
|
|
|
|
err_MPI_TAG,
|
|
|
|
|
|
|
|
err_failed_commlist,
|
|
|
|
err_sequence_number,
|
|
|
|
err_comm_first,
|
|
|
|
|
|
|
|
err_failed_communicator,
|
|
|
|
err_lrank_to_grank,
|
|
|
|
err_send_context,
|
|
|
|
err_recv_context,
|
|
|
|
err_comm_next,
|
|
|
|
err_comm_name,
|
|
|
|
|
|
|
|
err_all_communicators,
|
2007-06-20 02:34:20 +04:00
|
|
|
err_mpid_sends,
|
2006-09-18 21:32:04 +04:00
|
|
|
err_mpid_recvs,
|
|
|
|
err_group_corrupt
|
|
|
|
};
|
|
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Forward declarations
|
|
|
|
*/
|
|
|
|
static mqs_taddr_t fetch_pointer (mqs_process * proc, mqs_taddr_t addr, mpi_process_info *p_info);
|
|
|
|
static mqs_tword_t fetch_int (mqs_process * proc, mqs_taddr_t addr, mpi_process_info *p_info);
|
2006-09-20 11:51:30 +04:00
|
|
|
static mqs_tword_t fetch_bool(mqs_process * proc, mqs_taddr_t addr, mpi_process_info *p_info);
|
2006-09-18 21:32:04 +04:00
|
|
|
|
|
|
|
/* Internal structure we hold for each communicator */
|
|
|
|
typedef struct communicator_t
|
|
|
|
{
|
|
|
|
struct communicator_t * next;
|
|
|
|
group_t * group; /* Translations */
|
2006-09-19 10:31:42 +04:00
|
|
|
int recv_context; /* Unique ID for the communicator */
|
|
|
|
mqs_taddr_t comm_ptr;
|
2006-09-18 21:32:04 +04:00
|
|
|
int present;
|
|
|
|
mqs_communicator comm_info; /* Info needed at the higher level */
|
|
|
|
} communicator_t;
|
|
|
|
|
2006-10-11 21:29:29 +04:00
|
|
|
#if defined(CODE_NOT_USED)
|
2006-09-18 21:32:04 +04:00
|
|
|
/**********************************************************************/
|
|
|
|
/* Functions to handle translation groups.
|
|
|
|
* We have a list of these on the process info, so that we can
|
|
|
|
* share the group between multiple communicators.
|
|
|
|
*/
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Translate a process number */
|
|
|
|
static int translate (group_t *this, int index)
|
|
|
|
{
|
|
|
|
if (index == MQS_INVALID_PROCESS ||
|
|
|
|
((unsigned int)index) >= ((unsigned int) this->entries))
|
|
|
|
return MQS_INVALID_PROCESS;
|
|
|
|
else
|
|
|
|
return this->local_to_global[index];
|
|
|
|
} /* translate */
|
|
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Reverse translate a process number i.e. global to local*/
|
|
|
|
static int reverse_translate (group_t * this, int index)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for ( i = 0; i < this->entries; i++ )
|
|
|
|
if( this->local_to_global[i] == index )
|
|
|
|
return i;
|
|
|
|
|
|
|
|
return MQS_INVALID_PROCESS;
|
|
|
|
} /* reverse_translate */
|
2006-10-11 21:29:29 +04:00
|
|
|
#endif /* CODE_NOT_USED */
|
2006-09-18 21:32:04 +04:00
|
|
|
|
|
|
|
/**********************************************************************/
|
|
|
|
/* Search the group list for this group, if not found create it.
|
|
|
|
*/
|
|
|
|
static group_t * find_or_create_group (mqs_process *proc,
|
|
|
|
mqs_taddr_t table)
|
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
int intsize = p_info->sizes.int_size;
|
|
|
|
communicator_t *comm = p_info->communicator_list;
|
|
|
|
int *tr;
|
|
|
|
char *trbuffer;
|
|
|
|
int i;
|
|
|
|
group_t *g;
|
|
|
|
int np;
|
|
|
|
|
|
|
|
np = fetch_int( proc,
|
|
|
|
table + i_info->ompi_group_t.offset.grp_proc_count,
|
|
|
|
p_info );
|
2006-09-20 11:51:30 +04:00
|
|
|
if( np < 0 ) {
|
2006-09-18 21:32:04 +04:00
|
|
|
printf( "Get a size for the communicator = %d\n", np );
|
|
|
|
return NULL; /* Makes no sense ! */
|
|
|
|
}
|
|
|
|
/* Iterate over each communicator seeing if we can find this group */
|
|
|
|
for (;comm; comm = comm->next) {
|
|
|
|
g = comm->group;
|
|
|
|
if (g && g->table_base == table) {
|
|
|
|
g->ref_count++; /* Someone else is interested */
|
|
|
|
return g;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Hmm, couldn't find one, so fetch it */
|
|
|
|
g = (group_t *)mqs_malloc (sizeof (group_t));
|
|
|
|
tr = (int *)mqs_malloc (np*sizeof(int));
|
|
|
|
trbuffer = (char *)mqs_malloc (np*intsize);
|
|
|
|
g->local_to_global = tr;
|
|
|
|
|
|
|
|
if (mqs_ok != mqs_fetch_data (proc, table, np*intsize, trbuffer) ) {
|
|
|
|
mqs_free (g);
|
|
|
|
mqs_free (tr);
|
|
|
|
mqs_free (trbuffer);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This code is assuming that sizeof(int) is the same on target and host...
|
|
|
|
* that's a bit flaky, but probably actually OK.
|
|
|
|
*/
|
|
|
|
for( i = 0; i < np; i++ )
|
|
|
|
mqs_target_to_host( proc, trbuffer+intsize*i, &tr[i], intsize );
|
|
|
|
|
|
|
|
mqs_free(trbuffer);
|
|
|
|
|
|
|
|
g->entries = np;
|
|
|
|
g->ref_count = 1;
|
|
|
|
return g;
|
|
|
|
} /* find_or_create_group */
|
|
|
|
|
|
|
|
/***********************************************************************/
|
|
|
|
static void group_decref (group_t * group)
|
|
|
|
{
|
|
|
|
if (--(group->ref_count) == 0) {
|
|
|
|
mqs_free (group->local_to_global);
|
|
|
|
mqs_free (group);
|
|
|
|
}
|
|
|
|
} /* group_decref */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Perform basic setup for the image, we just allocate and clear
|
|
|
|
* our info.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_setup_image (mqs_image *image, const mqs_image_callbacks *icb)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_malloc (sizeof (mpi_image_info));
|
|
|
|
|
|
|
|
if (!i_info)
|
|
|
|
return err_no_store;
|
|
|
|
|
|
|
|
memset ((void *)i_info, 0, sizeof (mpi_image_info));
|
|
|
|
i_info->image_callbacks = icb; /* Before we do *ANYTHING* */
|
|
|
|
|
|
|
|
mqs_put_image_info (image, (mqs_image_info *)i_info);
|
|
|
|
|
|
|
|
return mqs_ok;
|
|
|
|
} /* mqs_setup_image */
|
|
|
|
|
|
|
|
|
|
|
|
/***********************************************************************
|
2007-08-04 04:34:42 +04:00
|
|
|
* Check for all the information we require to access the Open MPI message queues.
|
2006-09-18 21:32:04 +04:00
|
|
|
* Stash it into our structure on the image if we're succesful.
|
|
|
|
*/
|
|
|
|
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_image_has_queues (mqs_image *image, char **message)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_image_info * i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
char* missing_in_action;
|
|
|
|
|
|
|
|
/* Default failure message ! */
|
|
|
|
*message = "The symbols and types in the Open MPI library used by TotalView\n"
|
|
|
|
"to extract the message queues are not as expected in\n"
|
|
|
|
"the image '%s'\n"
|
|
|
|
"No message queue display is possible.\n"
|
2007-08-04 04:34:42 +04:00
|
|
|
"This is probably an Open MPI version or configuration problem.";
|
2006-09-18 21:32:04 +04:00
|
|
|
|
|
|
|
/* Force in the file containing our breakpoint function, to ensure that
|
|
|
|
* types have been read from there before we try to look them up.
|
|
|
|
*/
|
|
|
|
mqs_find_function (image, "MPIR_Breakpoint", mqs_lang_c, NULL);
|
|
|
|
|
|
|
|
/* Are we supposed to ignore this ? (e.g. it's really an HPF runtime using the
|
2007-08-04 04:34:42 +04:00
|
|
|
* Open MPI process acquisition, but not wanting queue display)
|
2006-09-18 21:32:04 +04:00
|
|
|
*/
|
|
|
|
if (mqs_find_symbol (image, "MPIR_Ignore_queues", NULL) == mqs_ok) {
|
|
|
|
*message = NULL; /* Fail silently */
|
|
|
|
return err_silent_failure;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Open MPI use a bunch of lists in order to kep track of the internal
|
|
|
|
* objects. We have to make sure we're able to find all of them in the image
|
|
|
|
* and compute their ofset in order to be able to parse them later.
|
|
|
|
* We need to find the opal_list_item_t, the opal_list_t, the ompi_free_list_item_t,
|
|
|
|
* and the ompi_free_list_t.
|
|
|
|
*
|
|
|
|
* Once we have these offsets, we should make sure that we have access to all
|
|
|
|
* requests lists and types. We're looking here only at the basic type for the
|
|
|
|
* requests as they hold all the information we need to export to the debugger.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "opal_list_item_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "opal_list_item_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->opal_list_item_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->opal_list_item_t.offset.opal_list_next = mqs_field_offset(qh_type, "opal_list_next");
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "opal_list_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "opal_list_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->opal_list_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->opal_list_t.offset.opal_list_sentinel = mqs_field_offset(qh_type, "opal_list_sentinel");
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_free_list_item_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_free_list_item_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
/* This is just an overloaded opal_list_item_t */
|
2006-09-19 06:07:06 +04:00
|
|
|
i_info->ompi_free_list_item_t.size = mqs_sizeof(qh_type);
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_free_list_memory_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_free_list_memory_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
/* This is just an overloaded opal_list_item_t */
|
|
|
|
i_info->ompi_free_list_memory_t.size = mqs_sizeof(qh_type);
|
2006-09-18 21:32:04 +04:00
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_free_list_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_free_list_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->ompi_free_list_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->ompi_free_list_t.offset.fl_elem_size = mqs_field_offset(qh_type, "fl_elem_size");
|
|
|
|
i_info->ompi_free_list_t.offset.fl_header_space = mqs_field_offset(qh_type, "fl_header_space");
|
|
|
|
i_info->ompi_free_list_t.offset.fl_alignment = mqs_field_offset(qh_type, "fl_alignment");
|
|
|
|
i_info->ompi_free_list_t.offset.fl_allocations = mqs_field_offset(qh_type, "fl_allocations");
|
|
|
|
i_info->ompi_free_list_t.offset.fl_max_to_alloc = mqs_field_offset(qh_type, "fl_max_to_alloc");
|
2006-09-19 10:31:42 +04:00
|
|
|
i_info->ompi_free_list_t.offset.fl_num_per_alloc = mqs_field_offset(qh_type, "fl_num_per_alloc");
|
|
|
|
i_info->ompi_free_list_t.offset.fl_num_allocated = mqs_field_offset(qh_type, "fl_num_allocated");
|
2006-09-18 21:32:04 +04:00
|
|
|
}
|
|
|
|
/**
|
|
|
|
* Now let's look for all types required for reading the requests.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_request_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_request_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
2006-09-19 06:07:06 +04:00
|
|
|
i_info->ompi_request_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->ompi_request_t.offset.req_type = mqs_field_offset(qh_type, "req_type");
|
|
|
|
i_info->ompi_request_t.offset.req_status = mqs_field_offset(qh_type, "req_status");
|
|
|
|
i_info->ompi_request_t.offset.req_complete = mqs_field_offset(qh_type, "req_complete");
|
|
|
|
i_info->ompi_request_t.offset.req_state = mqs_field_offset(qh_type, "req_state");
|
|
|
|
i_info->ompi_request_t.offset.req_f_to_c_index = mqs_field_offset(qh_type, "req_f_to_c_index");
|
2006-09-18 21:32:04 +04:00
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "mca_pml_base_request_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "mca_pml_base_request_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
2006-09-19 10:31:42 +04:00
|
|
|
i_info->mca_pml_base_request_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->mca_pml_base_request_t.offset.req_addr = mqs_field_offset(qh_type, "req_addr");
|
|
|
|
i_info->mca_pml_base_request_t.offset.req_count = mqs_field_offset(qh_type, "req_count");
|
|
|
|
i_info->mca_pml_base_request_t.offset.req_peer = mqs_field_offset(qh_type, "req_peer");
|
|
|
|
i_info->mca_pml_base_request_t.offset.req_tag = mqs_field_offset(qh_type, "req_tag");
|
|
|
|
i_info->mca_pml_base_request_t.offset.req_comm = mqs_field_offset(qh_type, "req_comm");
|
|
|
|
i_info->mca_pml_base_request_t.offset.req_proc = mqs_field_offset(qh_type, "req_proc");
|
|
|
|
i_info->mca_pml_base_request_t.offset.req_sequence = mqs_field_offset(qh_type, "req_sequence");
|
2006-09-20 11:51:30 +04:00
|
|
|
i_info->mca_pml_base_request_t.offset.req_type = mqs_field_offset(qh_type, "req_type");
|
2006-09-19 10:31:42 +04:00
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "mca_pml_base_send_request_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "mca_pml_base_send_request_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->mca_pml_base_send_request_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->mca_pml_base_send_request_t.offset.req_addr = mqs_field_offset(qh_type, "req_addr");
|
|
|
|
i_info->mca_pml_base_send_request_t.offset.req_bytes_packed = mqs_field_offset(qh_type, "req_bytes_packed");
|
|
|
|
i_info->mca_pml_base_send_request_t.offset.req_send_mode = mqs_field_offset(qh_type, "req_send_mode");
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "mca_pml_base_recv_request_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "mca_pml_base_recv_request_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->mca_pml_base_recv_request_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->mca_pml_base_recv_request_t.offset.req_bytes_packed = mqs_field_offset(qh_type, "req_bytes_packed");
|
2006-09-18 21:32:04 +04:00
|
|
|
}
|
2006-10-05 00:01:33 +04:00
|
|
|
/**
|
2007-08-04 04:34:42 +04:00
|
|
|
* Gather information about the received fragments and theirs headers.
|
2006-10-05 00:01:33 +04:00
|
|
|
*/
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "mca_pml_ob1_common_hdr_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "mca_pml_ob1_common_hdr_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->mca_pml_ob1_common_hdr_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->mca_pml_ob1_common_hdr_t.offset.hdr_type = mqs_field_offset(qh_type, "hdr_type");
|
|
|
|
i_info->mca_pml_ob1_common_hdr_t.offset.hdr_flags = mqs_field_offset(qh_type, "hdr_flags");
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "mca_pml_ob1_match_hdr_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "mca_pml_ob1_match_hdr_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->mca_pml_ob1_match_hdr_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->mca_pml_ob1_match_hdr_t.offset.hdr_common = mqs_field_offset(qh_type, "hdr_common");
|
|
|
|
i_info->mca_pml_ob1_match_hdr_t.offset.hdr_ctx = mqs_field_offset(qh_type, "hdr_ctx");
|
|
|
|
i_info->mca_pml_ob1_match_hdr_t.offset.hdr_src = mqs_field_offset(qh_type, "hdr_src");
|
|
|
|
i_info->mca_pml_ob1_match_hdr_t.offset.hdr_tag = mqs_field_offset(qh_type, "hdr_tag");
|
|
|
|
i_info->mca_pml_ob1_match_hdr_t.offset.hdr_seq = mqs_field_offset(qh_type, "hdr_seq");
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "mca_pml_ob1_recv_frag_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "mca_pml_ob1_recv_frag_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->mca_pml_ob1_recv_frag_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->mca_pml_ob1_recv_frag_t.offset.hdr = mqs_field_offset(qh_type, "hdr");
|
|
|
|
i_info->mca_pml_ob1_recv_frag_t.offset.request = mqs_field_offset(qh_type, "request");
|
|
|
|
}
|
|
|
|
|
2006-09-18 21:32:04 +04:00
|
|
|
/**
|
|
|
|
* And now let's look at the communicator and group structures.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_pointer_array_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_pointer_array_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->ompi_pointer_array_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->ompi_pointer_array_t.offset.lowest_free = mqs_field_offset(qh_type, "lowest_free");
|
|
|
|
i_info->ompi_pointer_array_t.offset.size = mqs_field_offset(qh_type, "size");
|
|
|
|
i_info->ompi_pointer_array_t.offset.addr = mqs_field_offset(qh_type, "addr");
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_communicator_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_communicator_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->ompi_communicator_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->ompi_communicator_t.offset.c_name = mqs_field_offset(qh_type, "c_name");
|
|
|
|
i_info->ompi_communicator_t.offset.c_contextid = mqs_field_offset(qh_type, "c_contextid");
|
|
|
|
i_info->ompi_communicator_t.offset.c_my_rank = mqs_field_offset(qh_type, "c_my_rank" );
|
|
|
|
i_info->ompi_communicator_t.offset.c_local_group = mqs_field_offset(qh_type, "c_local_group" );
|
|
|
|
}
|
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_group_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_group_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->ompi_group_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->ompi_group_t.offset.grp_proc_count = mqs_field_offset(qh_type, "grp_proc_count");
|
|
|
|
i_info->ompi_group_t.offset.grp_my_rank = mqs_field_offset(qh_type, "grp_my_rank");
|
|
|
|
i_info->ompi_group_t.offset.grp_flags = mqs_field_offset(qh_type, "grp_flags" );
|
|
|
|
}
|
2006-09-20 11:51:30 +04:00
|
|
|
{
|
|
|
|
mqs_type* qh_type = mqs_find_type( image, "ompi_status_public_t", mqs_lang_c );
|
|
|
|
if( !qh_type ) {
|
|
|
|
missing_in_action = "ompi_status_public_t";
|
|
|
|
goto type_missing;
|
|
|
|
}
|
|
|
|
i_info->ompi_status_public_t.size = mqs_sizeof(qh_type);
|
|
|
|
i_info->ompi_status_public_t.offset.MPI_SOURCE = mqs_field_offset(qh_type, "MPI_SOURCE");
|
|
|
|
i_info->ompi_status_public_t.offset.MPI_TAG = mqs_field_offset(qh_type, "MPI_TAG");
|
|
|
|
i_info->ompi_status_public_t.offset.MPI_ERROR = mqs_field_offset(qh_type, "MPI_ERROR" );
|
|
|
|
i_info->ompi_status_public_t.offset._count = mqs_field_offset(qh_type, "_count" );
|
|
|
|
i_info->ompi_status_public_t.offset._cancelled = mqs_field_offset(qh_type, "_cancelled" );
|
|
|
|
}
|
2006-09-18 21:32:04 +04:00
|
|
|
|
|
|
|
/* All the types are here. Let's succesfully return. */
|
|
|
|
return mqs_ok;
|
|
|
|
|
|
|
|
type_missing:
|
|
|
|
/**
|
2007-06-20 02:34:20 +04:00
|
|
|
* One of the required types is missing in the image. We are unable to extract
|
|
|
|
* the information we need from the pointers. We did our best but here
|
|
|
|
* we're at our limit. Give up!
|
2006-09-18 21:32:04 +04:00
|
|
|
*/
|
|
|
|
*message = missing_in_action;
|
|
|
|
return err_missing_type;
|
|
|
|
} /* mqs_image_has_queues */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Setup information needed for a specific process.
|
|
|
|
* TV assumes that this will hang something onto the process,
|
|
|
|
* if nothing is attached to it, then TV will believe that this process
|
|
|
|
* has no message queue information.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_setup_process (mqs_process *process, const mqs_process_callbacks *pcb)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
/* Extract the addresses of the global variables we need and save them away */
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_malloc (sizeof (mpi_process_info));
|
|
|
|
|
|
|
|
if (p_info) {
|
|
|
|
mqs_image *image;
|
|
|
|
mpi_image_info *i_info;
|
|
|
|
|
|
|
|
p_info->process_callbacks = pcb;
|
|
|
|
|
|
|
|
/* Now we can get the rest of the info ! */
|
|
|
|
image = mqs_get_image (process);
|
|
|
|
i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
|
|
|
|
/* Library starts at zero, so this ensures we go look to start with */
|
|
|
|
p_info->communicator_sequence = -1;
|
|
|
|
/* We have no communicators yet */
|
|
|
|
p_info->communicator_list = NULL;
|
|
|
|
mqs_get_type_sizes (process, &p_info->sizes);
|
|
|
|
|
|
|
|
mqs_put_process_info (process, (mqs_process_info *)p_info);
|
|
|
|
|
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
return err_no_store;
|
|
|
|
} /* mqs_setup_process */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Check the process for message queues.
|
|
|
|
*/
|
2007-05-07 17:59:37 +04:00
|
|
|
OMPI_DECLSPEC int mqs_process_has_queues (mqs_process *proc, char **msg)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
|
|
|
|
/* Don't bother with a pop up here, it's unlikely to be helpful */
|
|
|
|
*msg = 0;
|
|
|
|
|
|
|
|
if (mqs_find_symbol (image, "ompi_mpi_communicators", &p_info->commlist_base) != mqs_ok)
|
|
|
|
return err_all_communicators;
|
|
|
|
|
|
|
|
if (mqs_find_symbol (image, "mca_pml_base_send_requests", &p_info->send_queue_base) != mqs_ok)
|
2007-06-20 02:34:20 +04:00
|
|
|
return err_mpid_sends;
|
2006-09-18 21:32:04 +04:00
|
|
|
|
|
|
|
if (mqs_find_symbol (image, "mca_pml_base_recv_requests", &p_info->recv_queue_base) != mqs_ok)
|
|
|
|
return err_mpid_recvs;
|
|
|
|
|
|
|
|
return mqs_ok;
|
|
|
|
} /* mqs_setup_process_info */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Check if the communicators have changed by looking at the
|
|
|
|
* sequence number.
|
|
|
|
*/
|
|
|
|
static int communicators_changed (mqs_process *proc)
|
|
|
|
{
|
2006-09-20 11:51:30 +04:00
|
|
|
#if 0 /* TODO: how do we figure out which communicators have changed ? */
|
2006-09-18 21:32:04 +04:00
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
mqs_tword_t new_seq = fetch_int (proc,
|
|
|
|
p_info->commlist_base+i_info->sequence_number_offs,
|
|
|
|
p_info);
|
|
|
|
int res = (new_seq != p_info->communicator_sequence);
|
|
|
|
|
|
|
|
/* Save the sequence number for next time */
|
|
|
|
p_info->communicator_sequence = new_seq;
|
|
|
|
|
|
|
|
return res;
|
2006-09-20 11:51:30 +04:00
|
|
|
#endif
|
|
|
|
return 1;
|
2006-09-18 21:32:04 +04:00
|
|
|
} /* mqs_communicators_changed */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Find a matching communicator on our list. We check the recv context
|
|
|
|
* as well as the address since the communicator structures may be
|
|
|
|
* being re-allocated from a free list, in which case the same
|
|
|
|
* address will be re-used a lot, which could confuse us.
|
|
|
|
*/
|
|
|
|
static communicator_t * find_communicator (mpi_process_info *p_info,
|
|
|
|
int recv_ctx)
|
|
|
|
{
|
|
|
|
communicator_t * comm = p_info->communicator_list;
|
|
|
|
|
|
|
|
for (; comm; comm=comm->next) {
|
|
|
|
if (comm->recv_context == recv_ctx)
|
|
|
|
return comm;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
} /* find_communicator */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Comparison function for sorting communicators.
|
|
|
|
*/
|
|
|
|
static int compare_comms (const void *a, const void *b)
|
|
|
|
{
|
|
|
|
communicator_t * ca = *(communicator_t **)a;
|
|
|
|
communicator_t * cb = *(communicator_t **)b;
|
|
|
|
|
|
|
|
return cb->recv_context - ca->recv_context;
|
|
|
|
} /* compare_comms */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Rebuild our list of communicators because something has changed
|
|
|
|
*/
|
|
|
|
static int rebuild_communicator_list (mqs_process *proc)
|
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
communicator_t **commp, *old;
|
2007-01-19 22:48:06 +03:00
|
|
|
int commcount = 0;
|
|
|
|
int i;
|
2006-09-18 21:32:04 +04:00
|
|
|
mqs_tword_t comm_size;
|
|
|
|
mqs_taddr_t comm_addr_base = p_info->commlist_base + i_info->ompi_pointer_array_t.offset.addr;
|
|
|
|
mqs_taddr_t comm_ptr;
|
|
|
|
mqs_communicator remote_comm;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Start by getting the number of registered communicators in the
|
|
|
|
* global communicator array.
|
|
|
|
*/
|
|
|
|
comm_size = fetch_int( proc,
|
|
|
|
p_info->commlist_base + i_info->ompi_pointer_array_t.offset.size,
|
|
|
|
p_info );
|
|
|
|
/* Now get the pointer to the first communicator pointer */
|
|
|
|
comm_addr_base = fetch_pointer( proc, comm_addr_base, p_info );
|
|
|
|
|
|
|
|
for( i = 0; i < comm_size; i++ ) {
|
|
|
|
/* Get the communicator pointer */
|
|
|
|
comm_ptr =
|
|
|
|
fetch_pointer( proc,
|
|
|
|
comm_addr_base + i * p_info->sizes.pointer_size,
|
|
|
|
p_info );
|
|
|
|
if( 0 == comm_ptr ) continue;
|
|
|
|
|
|
|
|
/* Now let's grab the data we want from inside */
|
|
|
|
remote_comm.unique_id = fetch_int( proc,
|
|
|
|
comm_ptr + i_info->ompi_communicator_t.offset.c_contextid,
|
|
|
|
p_info );
|
|
|
|
remote_comm.local_rank = fetch_int( proc,
|
|
|
|
comm_ptr + i_info->ompi_communicator_t.offset.c_my_rank,
|
|
|
|
p_info );
|
|
|
|
mqs_fetch_data( proc, comm_ptr + i_info->ompi_communicator_t.offset.c_name,
|
|
|
|
64, remote_comm.name );
|
|
|
|
|
|
|
|
/* Do we already have this communicator ? */
|
|
|
|
old = find_communicator(p_info, remote_comm.unique_id);
|
|
|
|
if( NULL == old ) {
|
|
|
|
mqs_taddr_t group_base;
|
|
|
|
|
|
|
|
old = (communicator_t *)mqs_malloc (sizeof (communicator_t));
|
|
|
|
/* Save the results */
|
|
|
|
old->next = p_info->communicator_list;
|
|
|
|
p_info->communicator_list = old;
|
2006-09-20 11:51:30 +04:00
|
|
|
old->comm_ptr = comm_ptr;
|
2006-09-18 21:32:04 +04:00
|
|
|
old->recv_context = remote_comm.unique_id;
|
|
|
|
|
|
|
|
/* Now get the information about the group */
|
|
|
|
group_base =
|
|
|
|
fetch_pointer( proc, comm_ptr + i_info->ompi_communicator_t.offset.c_local_group,
|
|
|
|
p_info );
|
|
|
|
old->group = find_or_create_group( proc, group_base );
|
|
|
|
|
|
|
|
}
|
|
|
|
strncpy(old->comm_info.name, remote_comm.name, 64);
|
|
|
|
old->comm_info.unique_id = remote_comm.unique_id;
|
|
|
|
old->comm_info.local_rank = remote_comm.local_rank;
|
2006-09-20 11:51:30 +04:00
|
|
|
if( NULL != old->group ) {
|
2006-09-18 21:32:04 +04:00
|
|
|
old->comm_info.size = old->group->entries;
|
|
|
|
}
|
|
|
|
old->present = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now iterate over the list tidying up any communicators which
|
|
|
|
* no longer exist, and cleaning the flags on any which do.
|
|
|
|
*/
|
|
|
|
commp = &p_info->communicator_list;
|
|
|
|
|
|
|
|
for (; *commp; commp = &(*commp)->next) {
|
|
|
|
communicator_t *comm = *commp;
|
|
|
|
if (comm->present) {
|
|
|
|
comm->present = FALSE;
|
|
|
|
commcount++;
|
|
|
|
} else { /* It needs to be deleted */
|
|
|
|
*commp = comm->next; /* Remove from the list */
|
|
|
|
group_decref (comm->group); /* Group is no longer referenced from here */
|
|
|
|
mqs_free (comm);
|
2006-09-20 11:51:30 +04:00
|
|
|
if( *commp == NULL ) break;
|
2006-09-18 21:32:04 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (commcount) {
|
|
|
|
/* Sort the list so that it is displayed in some semi-sane order. */
|
|
|
|
communicator_t ** comm_array =
|
|
|
|
(communicator_t **) mqs_malloc(commcount * sizeof (communicator_t *));
|
|
|
|
communicator_t *comm = p_info->communicator_list;
|
2007-01-19 22:48:06 +03:00
|
|
|
|
2006-09-18 21:32:04 +04:00
|
|
|
for (i=0; i<commcount; i++, comm=comm->next)
|
|
|
|
comm_array [i] = comm;
|
|
|
|
|
|
|
|
/* Do the sort */
|
|
|
|
qsort (comm_array, commcount, sizeof (communicator_t *), compare_comms);
|
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
/* Rebuild the list */
|
2006-09-18 21:32:04 +04:00
|
|
|
p_info->communicator_list = NULL;
|
|
|
|
for (i=0; i<commcount; i++) {
|
|
|
|
comm = comm_array[i];
|
|
|
|
comm->next = p_info->communicator_list;
|
|
|
|
p_info->communicator_list = comm;
|
|
|
|
}
|
|
|
|
|
|
|
|
mqs_free (comm_array);
|
|
|
|
}
|
|
|
|
|
|
|
|
return mqs_ok;
|
|
|
|
} /* rebuild_communicator_list */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Update the list of communicators in the process if it has changed.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_update_communicator_list (mqs_process *proc)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
if (communicators_changed (proc))
|
|
|
|
return rebuild_communicator_list (proc);
|
|
|
|
else
|
|
|
|
return mqs_ok;
|
|
|
|
} /* mqs_update_communicator_list */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Setup to iterate over communicators.
|
|
|
|
* This is where we check whether our internal communicator list needs
|
|
|
|
* updating and if so do it.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_setup_communicator_iterator (mqs_process *proc)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
|
|
|
|
/* Start at the front of the list again */
|
|
|
|
p_info->current_communicator = p_info->communicator_list;
|
|
|
|
/* Reset the operation iterator too */
|
2006-09-19 06:07:06 +04:00
|
|
|
p_info->next_msg.free_list = 0;
|
|
|
|
p_info->next_msg.current_item = 0;
|
|
|
|
p_info->next_msg.opal_list_t_pos.list = 0;
|
2006-09-18 21:32:04 +04:00
|
|
|
|
|
|
|
return p_info->current_communicator == NULL ? mqs_end_of_list : mqs_ok;
|
|
|
|
} /* mqs_setup_communicator_iterator */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Fetch information about the current communicator.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_get_communicator (mqs_process *proc, mqs_communicator *comm)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
|
|
|
|
if (p_info->current_communicator) {
|
|
|
|
*comm = p_info->current_communicator->comm_info;
|
|
|
|
|
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
return err_no_current_communicator;
|
|
|
|
} /* mqs_get_communicator */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Get the group information about the current communicator.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_get_comm_group (mqs_process *proc, int *group_members)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
communicator_t *comm = p_info->current_communicator;
|
|
|
|
|
|
|
|
if (comm && comm->group) {
|
|
|
|
group_t * g = comm->group;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i=0; i<g->entries; i++)
|
|
|
|
group_members[i] = g->local_to_global[i];
|
|
|
|
|
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
return err_no_current_communicator;
|
|
|
|
} /* mqs_get_comm_group */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Step to the next communicator.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_next_communicator (mqs_process *proc)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
|
|
|
|
p_info->current_communicator = p_info->current_communicator->next;
|
|
|
|
|
|
|
|
return (p_info->current_communicator != NULL) ? mqs_ok : mqs_end_of_list;
|
|
|
|
} /* mqs_next_communicator */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parsing the opal_list_t.
|
|
|
|
*/
|
2006-09-19 10:31:42 +04:00
|
|
|
static int opal_list_t_init_parser( mqs_process *proc, mpi_process_info *p_info,
|
|
|
|
mqs_opal_list_t_pos* position, mqs_taddr_t list )
|
|
|
|
{
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
|
|
|
|
position->list = list;
|
|
|
|
position->sentinel = position->list + i_info->opal_list_t.offset.opal_list_sentinel;
|
|
|
|
position->current_item =
|
|
|
|
fetch_pointer( proc, position->sentinel + i_info->opal_list_item_t.offset.opal_list_next,
|
|
|
|
p_info );
|
|
|
|
if( position->current_item == position->sentinel )
|
|
|
|
position->current_item = 0;
|
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
|
2006-09-18 21:32:04 +04:00
|
|
|
static int next_item_opal_list_t( mqs_process *proc, mpi_process_info *p_info,
|
2006-09-19 06:07:06 +04:00
|
|
|
mqs_opal_list_t_pos* position, mqs_taddr_t* active_item )
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
2006-09-19 10:31:42 +04:00
|
|
|
mqs_image * image = mqs_get_image (proc);
|
2006-09-18 21:32:04 +04:00
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
|
2006-09-19 10:31:42 +04:00
|
|
|
*active_item = position->current_item;
|
|
|
|
if( 0 == position->current_item )
|
|
|
|
return mqs_end_of_list;
|
|
|
|
|
|
|
|
position->current_item =
|
|
|
|
fetch_pointer( proc,
|
|
|
|
position->current_item + i_info->opal_list_item_t.offset.opal_list_next,
|
|
|
|
p_info );
|
|
|
|
if( position->current_item == position->sentinel )
|
|
|
|
position->current_item = 0;
|
2006-09-18 21:32:04 +04:00
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
|
2006-10-11 21:29:29 +04:00
|
|
|
#if defined(CODE_NOT_USED)
|
2006-09-18 21:32:04 +04:00
|
|
|
/**
|
|
|
|
* Parsing the ompi_free_list lists.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
*
|
|
|
|
*
|
|
|
|
*/
|
2006-09-19 10:31:42 +04:00
|
|
|
static void ompi_free_list_t_dump_position( mqs_ompi_free_list_t_pos* position )
|
|
|
|
{
|
|
|
|
printf( "position->opal_list_t_pos.current_item = 0x%llx\n", (long long)position->opal_list_t_pos.current_item );
|
|
|
|
printf( "position->opal_list_t_pos.list = 0x%llx\n", (long long)position->opal_list_t_pos.list );
|
|
|
|
printf( "position->opal_list_t_pos.sentinel = 0x%llx\n", (long long)position->opal_list_t_pos.sentinel );
|
|
|
|
printf( "position->current_item = 0x%llx\n", (long long)position->current_item );
|
|
|
|
printf( "position->upper_bound = 0x%llx\n", (long long)position->upper_bound );
|
|
|
|
printf( "position->free_list = 0x%llx\n", (long long)position->free_list );
|
|
|
|
printf( "position->fl_elem_size = %ld\n", (long)position->fl_elem_size );
|
|
|
|
printf( "position->fl_header_space = %ld\n", (long)position->fl_header_space );
|
|
|
|
printf( "position->fl_alignment = %ld\n", (long)position->fl_alignment );
|
|
|
|
printf( "position->fl_num_per_alloc = %ld\n", (long)position->fl_num_per_alloc );
|
|
|
|
printf( "position->fl_num_allocated = %ld\n", (long)position->fl_num_allocated );
|
|
|
|
printf( "position->fl_num_initial_alloc = %ld\n", (long)position->fl_num_initial_alloc );
|
|
|
|
}
|
2006-10-11 21:29:29 +04:00
|
|
|
#endif /* CODE_NOT_USED */
|
2006-09-19 10:31:42 +04:00
|
|
|
|
2006-09-19 06:07:06 +04:00
|
|
|
static int ompi_free_list_t_init_parser( mqs_process *proc, mpi_process_info *p_info,
|
|
|
|
mqs_ompi_free_list_t_pos* position, mqs_taddr_t free_list )
|
|
|
|
{
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
mqs_taddr_t active_allocation;
|
|
|
|
|
2006-09-19 10:31:42 +04:00
|
|
|
position->free_list = free_list;
|
|
|
|
|
2006-09-19 06:07:06 +04:00
|
|
|
position->fl_elem_size =
|
|
|
|
fetch_int( proc, position->free_list + i_info->ompi_free_list_t.offset.fl_elem_size,
|
|
|
|
p_info );
|
|
|
|
position->fl_header_space =
|
|
|
|
fetch_int( proc, position->free_list + i_info->ompi_free_list_t.offset.fl_header_space,
|
|
|
|
p_info );
|
|
|
|
position->fl_alignment =
|
|
|
|
fetch_int( proc, position->free_list + i_info->ompi_free_list_t.offset.fl_alignment,
|
|
|
|
p_info );
|
|
|
|
position->fl_num_per_alloc =
|
|
|
|
fetch_int( proc, position->free_list + i_info->ompi_free_list_t.offset.fl_num_per_alloc,
|
|
|
|
p_info );
|
|
|
|
position->fl_num_allocated =
|
|
|
|
fetch_int( proc, position->free_list + i_info->ompi_free_list_t.offset.fl_num_allocated,
|
|
|
|
p_info );
|
|
|
|
/**
|
|
|
|
* Work around the strange ompi_free_list_t way to allocate elements. The first chunk is
|
|
|
|
* not required to have the same size as the others.
|
|
|
|
* A similar work around should be set for the last chunk of allocations too !!! But how
|
|
|
|
* can we solve ONE equation with 2 unknowns ?
|
|
|
|
*/
|
|
|
|
if( position->fl_num_allocated <= position->fl_num_per_alloc ) {
|
|
|
|
position->fl_num_initial_alloc = position->fl_num_allocated;
|
|
|
|
} else {
|
|
|
|
position->fl_num_initial_alloc = position->fl_num_allocated % position->fl_num_per_alloc;
|
|
|
|
if( 0 == position->fl_num_initial_alloc )
|
|
|
|
position->fl_num_initial_alloc = position->fl_num_per_alloc;
|
|
|
|
}
|
|
|
|
|
2006-09-19 10:31:42 +04:00
|
|
|
/**
|
|
|
|
* Initialize the pointer to the opal_list_t.
|
|
|
|
*/
|
|
|
|
opal_list_t_init_parser( proc, p_info, &position->opal_list_t_pos,
|
|
|
|
position->free_list + i_info->ompi_free_list_t.offset.fl_allocations );
|
|
|
|
next_item_opal_list_t( proc, p_info, &position->opal_list_t_pos, &active_allocation );
|
|
|
|
|
2006-09-19 06:07:06 +04:00
|
|
|
if( 0 == active_allocation ) { /* the end of the list */
|
|
|
|
position->upper_bound = 0;
|
|
|
|
} else {
|
|
|
|
/**
|
|
|
|
* Handle alignment issues...
|
|
|
|
*/
|
|
|
|
active_allocation += i_info->ompi_free_list_memory_t.size;
|
|
|
|
if( 0 != position->fl_alignment ) {
|
|
|
|
mqs_tword_t modulo;
|
|
|
|
|
|
|
|
active_allocation += position->fl_header_space;
|
|
|
|
modulo = active_allocation % position->fl_alignment;
|
|
|
|
active_allocation += (position->fl_alignment - modulo);
|
|
|
|
active_allocation -= position->fl_header_space;
|
|
|
|
}
|
2006-09-19 10:31:42 +04:00
|
|
|
/**
|
|
|
|
* Now let's try to compute the upper bound ...
|
|
|
|
*/
|
|
|
|
position->upper_bound =
|
|
|
|
position->fl_num_initial_alloc * position->fl_elem_size + active_allocation;
|
2006-09-19 06:07:06 +04:00
|
|
|
}
|
|
|
|
position->current_item = active_allocation;
|
2006-09-19 10:31:42 +04:00
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
/*ompi_free_list_t_dump_position( position );*/
|
2006-09-19 06:07:06 +04:00
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the current position and move the internal counter to the next element.
|
|
|
|
*/
|
|
|
|
static int ompi_free_list_t_next_item( mqs_process *proc, mpi_process_info *p_info,
|
|
|
|
mqs_ompi_free_list_t_pos* position, mqs_taddr_t* active_item )
|
|
|
|
{
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
mqs_taddr_t active_allocation;
|
|
|
|
|
|
|
|
*active_item = position->current_item;
|
|
|
|
if( 0 == position->current_item ) /* the end ... */
|
|
|
|
return mqs_ok;
|
|
|
|
|
|
|
|
position->current_item += position->fl_elem_size;
|
2006-09-19 10:31:42 +04:00
|
|
|
if( position->current_item >= position->upper_bound ) {
|
2006-09-20 11:51:30 +04:00
|
|
|
/*printf( "Reach the end of one of the ompi_free_list_t allocations. Go to the next one\n" );*/
|
2006-09-19 06:07:06 +04:00
|
|
|
/* we should go to the next allocation */
|
|
|
|
next_item_opal_list_t( proc, p_info,
|
|
|
|
&position->opal_list_t_pos, &active_allocation );
|
|
|
|
if( 0 == active_allocation ) { /* we're at the end */
|
|
|
|
position->current_item = 0;
|
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
* Handle alignment issues...
|
|
|
|
*/
|
|
|
|
active_allocation += i_info->ompi_free_list_memory_t.size;
|
|
|
|
if( 0 != position->fl_alignment ) {
|
|
|
|
mqs_tword_t modulo;
|
|
|
|
|
|
|
|
active_allocation += position->fl_header_space;
|
|
|
|
modulo = active_allocation % position->fl_alignment;
|
|
|
|
active_allocation += (position->fl_alignment - modulo);
|
|
|
|
active_allocation -= position->fl_header_space;
|
|
|
|
}
|
2006-09-19 10:31:42 +04:00
|
|
|
/**
|
|
|
|
* Now let's try to compute the upper bound ...
|
|
|
|
*/
|
|
|
|
position->upper_bound =
|
|
|
|
position->fl_num_per_alloc * position->fl_elem_size + active_allocation;
|
2006-09-19 06:07:06 +04:00
|
|
|
position->current_item = active_allocation;
|
2006-09-20 11:51:30 +04:00
|
|
|
/*ompi_free_list_t_dump_position( position );*/
|
2006-09-19 06:07:06 +04:00
|
|
|
}
|
2006-09-20 11:51:30 +04:00
|
|
|
/*printf( "Free list actual position %p next element at %p\n", (void*)*active_item,
|
|
|
|
(void*)position->current_item );*/
|
2006-09-19 06:07:06 +04:00
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
|
2006-10-11 21:29:29 +04:00
|
|
|
#if defined(CODE_NOT_USED)
|
2006-09-19 10:31:42 +04:00
|
|
|
static void dump_request( mqs_taddr_t current_item, mqs_pending_operation *res )
|
2006-09-19 06:07:06 +04:00
|
|
|
{
|
2006-09-20 11:51:30 +04:00
|
|
|
printf( "\n+===============================================+\n" );
|
|
|
|
printf( "|Request 0x%llx contain \n", (long long)current_item );
|
|
|
|
printf( "| res->status = %d\n", res->status );
|
|
|
|
printf( "| res->desired_local_rank = %ld\n", (long)res->desired_local_rank );
|
|
|
|
printf( "| res->desired_global_rank = %ld\n", (long)res->desired_global_rank );
|
|
|
|
printf( "| res->tag_wild = %ld\n", (long)res->tag_wild );
|
|
|
|
printf( "| res->desired_tag = %ld\n", (long)res->desired_tag );
|
|
|
|
printf( "| res->system_buffer = %s\n", (TRUE == res->system_buffer ? "TRUE" : "FALSE") );
|
|
|
|
printf( "| res->buffer = 0x%llx\n", (long long)res->buffer );
|
|
|
|
printf( "| res->desired_length = %ld\n", (long)res->desired_length );
|
|
|
|
if( res->status != mqs_st_pending ) {
|
|
|
|
printf( "| res->actual_length = %ld\n", (long)res->actual_length );
|
|
|
|
printf( "| res->actual_tag = %ld\n", (long)res->actual_tag );
|
|
|
|
printf( "| res->actual_local_rank = %ld\n", (long)res->actual_local_rank );
|
|
|
|
printf( "| res->actual_global_rank = %ld\n", (long)res->actual_global_rank );
|
|
|
|
}
|
|
|
|
printf( "+===============================================+\n\n" );
|
2006-09-19 10:31:42 +04:00
|
|
|
}
|
2006-10-11 21:29:29 +04:00
|
|
|
#endif /* CODE_NOT_USED */
|
2006-09-19 10:31:42 +04:00
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
/**
|
|
|
|
* TODO: ompi_request_completed can be used to detect any changes in the request handles.
|
|
|
|
*/
|
|
|
|
|
2006-09-19 10:31:42 +04:00
|
|
|
/**
|
|
|
|
* Handle the send queue as well as the receive queue. The unexpected queue
|
|
|
|
* is a whole different story ...
|
|
|
|
*/
|
|
|
|
static int fetch_request( mqs_process *proc, mpi_process_info *p_info,
|
|
|
|
mqs_pending_operation *res, int look_for_user_buffer )
|
|
|
|
{
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
mqs_taddr_t current_item;
|
|
|
|
mqs_tword_t req_complete, req_valid, req_type;
|
|
|
|
mqs_taddr_t req_buffer, req_comm;
|
|
|
|
|
|
|
|
while( 1 ) {
|
|
|
|
ompi_free_list_t_next_item( proc, p_info,
|
|
|
|
&p_info->next_msg, ¤t_item );
|
|
|
|
if( 0 == current_item )
|
|
|
|
return mqs_end_of_list;
|
|
|
|
req_valid = fetch_int( proc, current_item + i_info->ompi_request_t.offset.req_state, p_info );
|
|
|
|
if( OMPI_REQUEST_INVALID == req_valid ) continue;
|
|
|
|
req_comm = fetch_pointer( proc, current_item + i_info->mca_pml_base_request_t.offset.req_comm, p_info );
|
|
|
|
if( p_info->current_communicator->comm_ptr == req_comm ) break;
|
|
|
|
}
|
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
res->extra_text[0][0] = 0; res->extra_text[1][0] = 0; res->extra_text[2][0] = 0;
|
|
|
|
res->extra_text[3][0] = 0; res->extra_text[4][0] = 0;
|
|
|
|
|
2006-09-19 10:31:42 +04:00
|
|
|
req_type = fetch_int( proc, current_item + i_info->ompi_request_t.offset.req_type, p_info );
|
|
|
|
if( OMPI_REQUEST_PML == req_type ) {
|
2006-09-20 11:51:30 +04:00
|
|
|
req_type =
|
|
|
|
fetch_int( proc, current_item + i_info->mca_pml_base_request_t.offset.req_type,
|
|
|
|
p_info);
|
|
|
|
req_complete = fetch_bool( proc, current_item + i_info->ompi_request_t.offset.req_complete, p_info );
|
2006-09-19 10:31:42 +04:00
|
|
|
res->status = (req_complete == 0 ? mqs_st_pending : mqs_st_complete);
|
2006-09-20 11:51:30 +04:00
|
|
|
|
2006-09-19 10:31:42 +04:00
|
|
|
res->desired_local_rank =
|
|
|
|
fetch_int( proc, current_item + i_info->mca_pml_base_request_t.offset.req_peer, p_info );
|
|
|
|
res->desired_global_rank = res->desired_local_rank;
|
|
|
|
res->desired_tag =
|
|
|
|
fetch_int( proc, current_item + i_info->mca_pml_base_request_t.offset.req_tag, p_info );
|
2006-09-20 11:51:30 +04:00
|
|
|
res->tag_wild = (MPI_ANY_TAG == res->desired_tag ? TRUE : FALSE);
|
2006-09-19 10:31:42 +04:00
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
res->buffer = fetch_pointer( proc, current_item + i_info->mca_pml_base_request_t.offset.req_addr,
|
2006-09-19 10:31:42 +04:00
|
|
|
p_info );
|
2006-09-20 11:51:30 +04:00
|
|
|
res->system_buffer = FALSE;
|
|
|
|
if( MCA_PML_REQUEST_SEND == req_type ) {
|
|
|
|
snprintf( (char *)res->extra_text[0], 64, "Non-blocking send 0x%llx", (long long)current_item );
|
|
|
|
req_buffer =
|
|
|
|
fetch_pointer( proc, current_item + i_info->mca_pml_base_send_request_t.offset.req_addr,
|
|
|
|
p_info );
|
|
|
|
res->system_buffer = ( req_buffer == res->buffer ? FALSE : TRUE );
|
|
|
|
res->desired_length =
|
|
|
|
fetch_int( proc,
|
|
|
|
current_item + i_info->mca_pml_base_send_request_t.offset.req_bytes_packed, p_info );
|
|
|
|
} else if( MCA_PML_REQUEST_RECV == req_type ) {
|
|
|
|
snprintf( (char *)res->extra_text[0], 64, "Non-blocking recv 0x%llx", (long long)current_item );
|
2006-09-19 10:31:42 +04:00
|
|
|
} else {
|
2006-09-20 11:51:30 +04:00
|
|
|
snprintf( (char *)res->extra_text[0], 64, "Unknown type of request 0x%llx", (long long)current_item );
|
2006-09-19 10:31:42 +04:00
|
|
|
}
|
2006-09-20 11:51:30 +04:00
|
|
|
res->desired_length =
|
|
|
|
fetch_int( proc, current_item + i_info->mca_pml_base_request_t.offset.req_count, p_info );
|
2006-09-19 10:31:42 +04:00
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
if( mqs_st_pending != res->status ) { /* The real data from the status */
|
|
|
|
res->actual_length =
|
|
|
|
fetch_int( proc, current_item + i_info->ompi_request_t.offset.req_status +
|
|
|
|
i_info->ompi_status_public_t.offset._count, p_info );
|
|
|
|
res->actual_tag =
|
|
|
|
fetch_int( proc, current_item + i_info->ompi_request_t.offset.req_status +
|
|
|
|
i_info->ompi_status_public_t.offset.MPI_TAG, p_info );
|
|
|
|
res->actual_local_rank =
|
|
|
|
fetch_int( proc, current_item + i_info->ompi_request_t.offset.req_status +
|
|
|
|
i_info->ompi_status_public_t.offset.MPI_SOURCE, p_info );
|
|
|
|
res->actual_global_rank = res->actual_local_rank; /* TODO: what's the global rank ? */
|
|
|
|
}
|
|
|
|
/*dump_request( current_item, res );*/
|
2006-09-19 10:31:42 +04:00
|
|
|
}
|
2006-09-19 06:07:06 +04:00
|
|
|
return mqs_ok;
|
|
|
|
}
|
2006-09-18 21:32:04 +04:00
|
|
|
|
2006-09-19 06:07:06 +04:00
|
|
|
#if 0
|
2006-09-18 21:32:04 +04:00
|
|
|
/***********************************************************************
|
|
|
|
* Handle the unexpected queue and the pending receive queue.
|
|
|
|
* They're very similar.
|
|
|
|
*/
|
|
|
|
static int fetch_receive (mqs_process *proc, mpi_process_info *p_info,
|
|
|
|
mqs_pending_operation *res, int look_for_user_buffer)
|
|
|
|
{
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
2006-09-19 06:07:06 +04:00
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
communicator_t *comm = p_info->current_communicator;
|
2006-09-18 21:32:04 +04:00
|
|
|
mqs_tword_t wanted_context = comm->recv_context;
|
|
|
|
mqs_taddr_t base = fetch_pointer (proc, p_info->next_msg, p_info);
|
|
|
|
|
|
|
|
while (base != 0) { /* Well, there's a queue, at least ! */
|
|
|
|
mqs_tword_t actual_context = fetch_int (proc, base + i_info->context_id_offs, p_info);
|
|
|
|
|
|
|
|
if (actual_context == wanted_context) { /* Found a good one */
|
|
|
|
mqs_tword_t tag = fetch_int (proc, base + i_info->tag_offs, p_info);
|
|
|
|
mqs_tword_t tagmask = fetch_int (proc, base + i_info->tagmask_offs, p_info);
|
|
|
|
mqs_tword_t lsrc = fetch_int (proc, base + i_info->lsrc_offs, p_info);
|
|
|
|
mqs_tword_t srcmask = fetch_int (proc, base + i_info->srcmask_offs, p_info);
|
|
|
|
mqs_taddr_t ptr = fetch_pointer (proc, base + i_info->ptr_offs, p_info);
|
|
|
|
|
|
|
|
/* Fetch the fields from the MPIR_RHANDLE */
|
|
|
|
int is_complete = fetch_int (proc, ptr + i_info->is_complete_offs, p_info);
|
|
|
|
mqs_taddr_t buf = fetch_pointer (proc, ptr + i_info->buf_offs, p_info);
|
|
|
|
mqs_tword_t len = fetch_int (proc, ptr + i_info->len_offs, p_info);
|
|
|
|
mqs_tword_t count = fetch_int (proc, ptr + i_info->count_offs, p_info);
|
|
|
|
|
|
|
|
/* If we don't have start, then use buf instead... */
|
|
|
|
mqs_taddr_t start;
|
|
|
|
if (i_info->start_offs < 0)
|
|
|
|
start = buf;
|
|
|
|
else
|
|
|
|
start = fetch_pointer (proc, ptr + i_info->start_offs, p_info);
|
|
|
|
|
|
|
|
/* Hurrah, we should now be able to fill in all the necessary fields in the
|
|
|
|
* result !
|
|
|
|
*/
|
|
|
|
res->status = is_complete ? mqs_st_complete : mqs_st_pending; /* We can't discern matched */
|
|
|
|
if (srcmask == 0) {
|
|
|
|
res->desired_local_rank = -1;
|
|
|
|
res->desired_global_rank = -1;
|
|
|
|
} else {
|
|
|
|
res->desired_local_rank = lsrc;
|
|
|
|
res->desired_global_rank = translate (comm->group, lsrc);
|
|
|
|
|
|
|
|
}
|
|
|
|
res->tag_wild = (tagmask == 0);
|
|
|
|
res->desired_tag = tag;
|
|
|
|
|
|
|
|
if (look_for_user_buffer) {
|
|
|
|
res->system_buffer = FALSE;
|
|
|
|
res->buffer = buf;
|
|
|
|
res->desired_length = len;
|
|
|
|
} else {
|
|
|
|
res->system_buffer = TRUE;
|
|
|
|
/* Correct an oddity. If the buffer length is zero then no buffer
|
|
|
|
* is allocated, but the descriptor is left with random data.
|
|
|
|
*/
|
|
|
|
if (count == 0)
|
|
|
|
start = 0;
|
|
|
|
|
|
|
|
res->buffer = start;
|
|
|
|
res->desired_length = count;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_complete) { /* Fill in the actual results, rather than what we were looking for */
|
|
|
|
mqs_tword_t mpi_source = fetch_int (proc, ptr + i_info->MPI_SOURCE_offs, p_info);
|
|
|
|
mqs_tword_t mpi_tag = fetch_int (proc, ptr + i_info->MPI_TAG_offs, p_info);
|
|
|
|
|
|
|
|
res->actual_length = count;
|
|
|
|
res->actual_tag = mpi_tag;
|
|
|
|
res->actual_local_rank = mpi_source;
|
|
|
|
res->actual_global_rank= translate (comm->group, mpi_source);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't forget to step the queue ! */
|
|
|
|
p_info->next_msg = base + i_info->next_offs;
|
|
|
|
return mqs_ok;
|
|
|
|
} else { /* Try the next one */
|
|
|
|
base = fetch_pointer (proc, base + i_info->next_offs, p_info);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
p_info->next_msg = 0;
|
|
|
|
return mqs_end_of_list;
|
|
|
|
} /* fetch_receive */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Handle the send queue, somewhat different.
|
|
|
|
*/
|
|
|
|
static int fetch_send (mqs_process *proc, mpi_process_info *p_info,
|
|
|
|
mqs_pending_operation *res)
|
|
|
|
{
|
|
|
|
mqs_image * image = mqs_get_image (proc);
|
|
|
|
mpi_image_info *i_info = (mpi_image_info *)mqs_get_image_info (image);
|
|
|
|
communicator_t *comm = p_info->current_communicator;
|
|
|
|
mqs_taddr_t base = fetch_pointer (proc, p_info->next_msg, p_info);
|
|
|
|
|
2007-08-04 04:34:42 +04:00
|
|
|
/**
|
|
|
|
* Mark all send requests as non blocking. In Open MPI we are able to make
|
|
|
|
* a difference between blocking and non blocking by looking into the
|
|
|
|
* type field, but I'll keep this for later.
|
2006-09-18 21:32:04 +04:00
|
|
|
*/
|
2007-08-04 04:34:42 +04:00
|
|
|
strcpy( (char *)res->extra_text[0], "Non-blocking send" );
|
2006-09-18 21:32:04 +04:00
|
|
|
res->extra_text[1][0] = 0;
|
|
|
|
|
|
|
|
while (base != 0) { /* Well, there's a queue, at least ! */
|
|
|
|
/* Check if it's one we're interested in ? */
|
|
|
|
mqs_taddr_t commp = fetch_pointer (proc, base+i_info->db_comm_offs, p_info);
|
|
|
|
int recv_ctx = fetch_int (proc, commp+i_info->recv_context_offs, p_info);
|
|
|
|
|
|
|
|
mqs_taddr_t next = base+i_info->db_next_offs;
|
|
|
|
|
|
|
|
if (recv_ctx == comm->recv_context) { /* Found one */
|
|
|
|
mqs_tword_t target = fetch_int (proc, base+i_info->db_target_offs, p_info);
|
|
|
|
mqs_tword_t tag = fetch_int (proc, base+i_info->db_tag_offs, p_info);
|
|
|
|
mqs_tword_t length = fetch_int (proc, base+i_info->db_byte_length_offs, p_info);
|
|
|
|
mqs_taddr_t data = fetch_pointer (proc, base+i_info->db_data_offs, p_info);
|
|
|
|
mqs_taddr_t shandle= fetch_pointer (proc, base+i_info->db_shandle_offs, p_info);
|
|
|
|
mqs_tword_t complete=fetch_int (proc, shandle+i_info->is_complete_offs, p_info);
|
|
|
|
|
|
|
|
/* Ok, fill in the results */
|
|
|
|
res->status = complete ? mqs_st_complete : mqs_st_pending; /* We can't discern matched */
|
|
|
|
res->actual_local_rank = res->desired_local_rank = target;
|
|
|
|
res->actual_global_rank= res->desired_global_rank= translate (comm->group, target);
|
|
|
|
res->tag_wild = FALSE;
|
|
|
|
res->actual_tag = res->desired_tag = tag;
|
|
|
|
res->desired_length = res->actual_length = length;
|
|
|
|
res->system_buffer = FALSE;
|
|
|
|
res->buffer = data;
|
|
|
|
|
|
|
|
p_info->next_msg = next;
|
|
|
|
return mqs_ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
base = fetch_pointer (proc, next, p_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
p_info->next_msg = 0;
|
|
|
|
return mqs_end_of_list;
|
|
|
|
} /* fetch_send */
|
2006-09-19 06:07:06 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Setup to iterate over pending operations
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_setup_operation_iterator (mqs_process *proc, int op)
|
2006-09-19 06:07:06 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
|
|
|
|
p_info->what = (mqs_op_class)op;
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case mqs_pending_sends:
|
|
|
|
ompi_free_list_t_init_parser( proc, p_info, &p_info->next_msg, p_info->send_queue_base );
|
|
|
|
return mqs_ok;
|
|
|
|
|
|
|
|
case mqs_pending_receives:
|
|
|
|
ompi_free_list_t_init_parser( proc, p_info, &p_info->next_msg, p_info->recv_queue_base );
|
|
|
|
return mqs_ok;
|
|
|
|
|
|
|
|
case mqs_unexpected_messages: /* TODO */
|
|
|
|
return mqs_no_information;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return err_bad_request;
|
|
|
|
}
|
|
|
|
} /* mqs_setup_operation_iterator */
|
2006-09-18 21:32:04 +04:00
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Fetch the next valid operation.
|
2007-08-04 04:34:42 +04:00
|
|
|
* Since Open MPI only maintains a single queue of each type of operation,
|
2006-09-18 21:32:04 +04:00
|
|
|
* we have to run over it and filter out the operations which
|
|
|
|
* match the active communicator.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
int mqs_next_operation (mqs_process *proc, mqs_pending_operation *op)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mqs_get_process_info (proc);
|
|
|
|
|
|
|
|
switch (p_info->what) {
|
|
|
|
case mqs_pending_receives:
|
2006-09-19 06:07:06 +04:00
|
|
|
return fetch_request( proc, p_info, op, TRUE );
|
2006-09-18 21:32:04 +04:00
|
|
|
case mqs_unexpected_messages:
|
2006-09-19 06:07:06 +04:00
|
|
|
/* TODO: not handled yet */
|
2006-09-20 11:51:30 +04:00
|
|
|
return err_bad_request;
|
|
|
|
case mqs_pending_sends:
|
|
|
|
return fetch_request( proc, p_info, op, FALSE );
|
2006-09-18 21:32:04 +04:00
|
|
|
default: return err_bad_request;
|
|
|
|
}
|
|
|
|
} /* mqs_next_operation */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Destroy the info.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
void mqs_destroy_process_info (mqs_process_info *mp_info)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mpi_process_info *p_info = (mpi_process_info *)mp_info;
|
|
|
|
/* Need to handle the communicators and groups too */
|
|
|
|
communicator_t *comm = p_info->communicator_list;
|
|
|
|
|
|
|
|
while (comm) {
|
|
|
|
communicator_t *next = comm->next;
|
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
if( NULL != comm->group )
|
|
|
|
group_decref (comm->group); /* Group is no longer referenced from here */
|
2006-09-18 21:32:04 +04:00
|
|
|
mqs_free (comm);
|
|
|
|
|
|
|
|
comm = next;
|
|
|
|
}
|
|
|
|
mqs_free (p_info);
|
|
|
|
} /* mqs_destroy_process_info */
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
* Free off the data we associated with an image. Since we malloced it
|
|
|
|
* we just free it.
|
|
|
|
*/
|
2007-08-09 17:52:23 +04:00
|
|
|
void mqs_destroy_image_info (mqs_image_info *info)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
mqs_free (info);
|
|
|
|
} /* mqs_destroy_image_info */
|
|
|
|
|
|
|
|
/***********************************************************************/
|
|
|
|
static mqs_taddr_t fetch_pointer (mqs_process * proc, mqs_taddr_t addr, mpi_process_info *p_info)
|
|
|
|
{
|
|
|
|
int asize = p_info->sizes.pointer_size;
|
|
|
|
char data [8]; /* ASSUME a pointer fits in 8 bytes */
|
|
|
|
mqs_taddr_t res = 0;
|
|
|
|
|
|
|
|
if (mqs_ok == mqs_fetch_data (proc, addr, asize, data))
|
|
|
|
mqs_target_to_host (proc, data,
|
|
|
|
((char *)&res) + (host_is_big_endian ? sizeof(mqs_taddr_t)-asize : 0),
|
|
|
|
asize);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
} /* fetch_pointer */
|
|
|
|
|
|
|
|
/***********************************************************************/
|
|
|
|
static mqs_tword_t fetch_int (mqs_process * proc, mqs_taddr_t addr, mpi_process_info *p_info)
|
|
|
|
{
|
|
|
|
int isize = p_info->sizes.int_size;
|
|
|
|
char buffer[8]; /* ASSUME an integer fits in 8 bytes */
|
|
|
|
mqs_tword_t res = 0;
|
|
|
|
|
|
|
|
if (mqs_ok == mqs_fetch_data (proc, addr, isize, buffer))
|
|
|
|
mqs_target_to_host (proc, buffer,
|
|
|
|
((char *)&res) + (host_is_big_endian ? sizeof(mqs_tword_t)-isize : 0),
|
|
|
|
isize);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
} /* fetch_int */
|
|
|
|
|
2006-09-20 11:51:30 +04:00
|
|
|
/***********************************************************************/
|
|
|
|
static mqs_tword_t fetch_bool(mqs_process * proc, mqs_taddr_t addr, mpi_process_info *p_info)
|
|
|
|
{
|
|
|
|
int isize = 1;
|
|
|
|
char buffer; /* ASSUME an integer fits in 8 bytes */
|
|
|
|
mqs_tword_t res = 0;
|
|
|
|
|
|
|
|
if (mqs_ok == mqs_fetch_data (proc, addr, isize, &buffer))
|
|
|
|
res = (mqs_tword_t)buffer;
|
|
|
|
|
|
|
|
return res;
|
|
|
|
} /* fetch_bool */
|
|
|
|
|
2006-09-18 21:32:04 +04:00
|
|
|
/***********************************************************************/
|
|
|
|
/* Convert an error code into a printable string */
|
2007-08-09 17:52:23 +04:00
|
|
|
char * mqs_dll_error_string (int errcode)
|
2006-09-18 21:32:04 +04:00
|
|
|
{
|
|
|
|
switch (errcode) {
|
|
|
|
case err_silent_failure:
|
|
|
|
return "";
|
|
|
|
case err_no_current_communicator:
|
|
|
|
return "No current communicator in the communicator iterator";
|
|
|
|
case err_bad_request:
|
|
|
|
return "Attempting to setup to iterate over an unknown queue of operations";
|
|
|
|
case err_no_store:
|
|
|
|
return "Unable to allocate store";
|
|
|
|
case err_failed_qhdr:
|
|
|
|
return "Failed to find type MPID_QHDR";
|
|
|
|
case err_unexpected:
|
|
|
|
return "Failed to find field 'unexpected' in MPID_QHDR";
|
|
|
|
case err_posted:
|
|
|
|
return "Failed to find field 'posted' in MPID_QHDR";
|
|
|
|
case err_failed_queue:
|
|
|
|
return "Failed to find type MPID_QUEUE";
|
|
|
|
case err_first:
|
|
|
|
return "Failed to find field 'first' in MPID_QUEUE";
|
|
|
|
case err_context_id:
|
|
|
|
return "Failed to find field 'context_id' in MPID_QEL";
|
|
|
|
case err_tag:
|
|
|
|
return "Failed to find field 'tag' in MPID_QEL";
|
|
|
|
case err_tagmask:
|
|
|
|
return "Failed to find field 'tagmask' in MPID_QEL";
|
|
|
|
case err_lsrc:
|
|
|
|
return "Failed to find field 'lsrc' in MPID_QEL";
|
|
|
|
case err_srcmask:
|
|
|
|
return "Failed to find field 'srcmask' in MPID_QEL";
|
|
|
|
case err_next:
|
|
|
|
return "Failed to find field 'next' in MPID_QEL";
|
|
|
|
case err_ptr:
|
|
|
|
return "Failed to find field 'ptr' in MPID_QEL";
|
|
|
|
case err_missing_type:
|
|
|
|
return "Failed to find some type";
|
|
|
|
case err_missing_symbol:
|
|
|
|
return "Failed to find field the global symbol";
|
|
|
|
case err_db_shandle:
|
|
|
|
return "Failed to find field 'db_shandle' in MPIR_SQEL";
|
|
|
|
case err_db_comm:
|
|
|
|
return "Failed to find field 'db_comm' in MPIR_SQEL";
|
|
|
|
case err_db_target:
|
|
|
|
return "Failed to find field 'db_target' in MPIR_SQEL";
|
|
|
|
case err_db_tag:
|
|
|
|
return "Failed to find field 'db_tag' in MPIR_SQEL";
|
|
|
|
case err_db_data:
|
|
|
|
return "Failed to find field 'db_data' in MPIR_SQEL";
|
|
|
|
case err_db_byte_length:
|
|
|
|
return "Failed to find field 'db_byte_length' in MPIR_SQEL";
|
|
|
|
case err_db_next:
|
|
|
|
return "Failed to find field 'db_next' in MPIR_SQEL";
|
|
|
|
case err_failed_rhandle:
|
|
|
|
return "Failed to find type MPIR_RHANDLE";
|
|
|
|
case err_is_complete:
|
|
|
|
return "Failed to find field 'is_complete' in MPIR_RHANDLE";
|
|
|
|
case err_buf:
|
|
|
|
return "Failed to find field 'buf' in MPIR_RHANDLE";
|
|
|
|
case err_len:
|
|
|
|
return "Failed to find field 'len' in MPIR_RHANDLE";
|
|
|
|
case err_s:
|
|
|
|
return "Failed to find field 's' in MPIR_RHANDLE";
|
|
|
|
case err_failed_status:
|
|
|
|
return "Failed to find type MPI_Status";
|
|
|
|
case err_count:
|
|
|
|
return "Failed to find field 'count' in MPIR_Status";
|
|
|
|
case err_MPI_SOURCE:
|
|
|
|
return "Failed to find field 'MPI_SOURCE' in MPIR_Status";
|
|
|
|
case err_MPI_TAG:
|
|
|
|
return "Failed to find field 'MPI_TAG' in MPIR_Status";
|
|
|
|
case err_failed_commlist:
|
|
|
|
return "Failed to find type MPIR_Comm_list";
|
|
|
|
case err_sequence_number:
|
|
|
|
return "Failed to find field 'sequence_number' in MPIR_Comm_list";
|
|
|
|
case err_comm_first:
|
|
|
|
return "Failed to find field 'comm_first' in MPIR_Comm_list";
|
|
|
|
case err_failed_communicator:
|
|
|
|
return "Failed to find type MPIR_Communicator";
|
|
|
|
case err_lrank_to_grank:
|
|
|
|
return "Failed to find field 'lrank_to_grank' in MPIR_Communicator";
|
|
|
|
case err_send_context:
|
|
|
|
return "Failed to find field 'send_context' in MPIR_Communicator";
|
|
|
|
case err_recv_context:
|
|
|
|
return "Failed to find field 'recv_context' in MPIR_Communicator";
|
|
|
|
case err_comm_next:
|
|
|
|
return "Failed to find field 'comm_next' in MPIR_Communicator";
|
|
|
|
case err_comm_name:
|
|
|
|
return "Failed to find field 'comm_name' in MPIR_Communicator";
|
|
|
|
case err_all_communicators:
|
|
|
|
return "Failed to find the global symbol MPIR_All_communicators";
|
2007-06-20 02:34:20 +04:00
|
|
|
case err_mpid_sends:
|
|
|
|
return "Failed to access the global send requests list";
|
2006-09-18 21:32:04 +04:00
|
|
|
case err_mpid_recvs:
|
2007-06-20 02:34:20 +04:00
|
|
|
return "Failed to access the global receive requests list";
|
2006-09-18 21:32:04 +04:00
|
|
|
case err_group_corrupt:
|
|
|
|
return "Could not read a communicator's group from the process (probably a store corruption)";
|
|
|
|
|
|
|
|
default: return "Unknown error code";
|
|
|
|
}
|
|
|
|
} /* mqs_dll_error_string */
|