1
1
1. Fixed the GPR search engine so that keys AND worked, and so that multiple objects with the same key didn't mess up the search.

2. Added an orte_bitmap function based on the existing ompi_bitmap one, but minus the fortran "pollution"

3. Added a new name service function called create_my_name to remove the duplicate name creation that was happening with the RML. Basically, the RML has to assign a name when a process makes first contact if the process doesn't already have a name. For processes that get a name passed into them, this was okay - the name was already assigned. For other processes (e.g., singletons), this was not okay - the first message to the seed daemon was to create a name, which caused the RML to assign one, and then the name service to assign another.

4. Change orted so it gets its name the way everyone else does - during orte_init.

This commit was SVN r5842.
Этот коммит содержится в:
Ralph Castain 2005-05-24 13:39:15 +00:00
родитель 7974c5b6b8
Коммит 2451f3bdc9
24 изменённых файлов: 662 добавлений и 105 удалений

Просмотреть файл

@ -23,8 +23,8 @@ noinst_LTLIBRARIES = liblfc.la
headers = \
ompi_bitmap.h \
ompi_circular_buffer_fifo.h \
ompi_fifo.h \
ompi_circular_buffer_fifo.h \
ompi_fifo.h \
ompi_free_list.h \
ompi_hash_table.h \
ompi_list.h \
@ -34,7 +34,8 @@ headers = \
ompi_value_array.h \
ompi_rb_tree.h \
orte_pointer_array.h \
orte_value_array.h
orte_value_array.h \
orte_bitmap.h
liblfc_la_SOURCES = \
$(headers) \
@ -48,7 +49,8 @@ liblfc_la_SOURCES = \
ompi_value_array.c \
ompi_rb_tree.c \
orte_pointer_array.c \
orte_value_array.c
orte_value_array.c \
orte_bitmap.c
# Conditionally install the header files

243
src/class/orte_bitmap.c Обычный файл
Просмотреть файл

@ -0,0 +1,243 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <stdio.h>
#include "include/orte_constants.h"
#include "mca/errmgr/errmgr.h"
#include "class/orte_bitmap.h"
#define SIZE_OF_CHAR (sizeof(char) * 8)
static void orte_bitmap_construct(orte_bitmap_t *bm);
static void orte_bitmap_destruct(orte_bitmap_t *bm);
OBJ_CLASS_INSTANCE(orte_bitmap_t, ompi_object_t,
orte_bitmap_construct, orte_bitmap_destruct);
static void
orte_bitmap_construct(orte_bitmap_t *bm)
{
bm->legal_numbits = 0;
bm->array_size = 0;
bm->bitmap = NULL;
}
static void
orte_bitmap_destruct(orte_bitmap_t *bm)
{
if (NULL != bm->bitmap) {
free(bm->bitmap);
}
}
int
orte_bitmap_init(orte_bitmap_t *bm, size_t size)
{
size_t actual_size;
if (NULL == bm) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
bm->legal_numbits = size;
actual_size = size / SIZE_OF_CHAR;
actual_size += (size % SIZE_OF_CHAR == 0) ? 0 : 1;
bm->bitmap = (unsigned char *) malloc(actual_size);
if (NULL == bm->bitmap) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
bm->array_size = actual_size;
orte_bitmap_clear_all_bits(bm);
return ORTE_SUCCESS;
}
int
orte_bitmap_set_bit(orte_bitmap_t *bm, size_t bit)
{
size_t index, offset, new_size, i;
if (NULL == bm) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
index = bit / SIZE_OF_CHAR;
offset = bit % SIZE_OF_CHAR;
if (index >= bm->array_size) {
/* We need to allocate more space for the bitmap, since we are
out of range. We dont throw any error here, because this is
valid and we simply expand the bitmap */
new_size = (index / bm->array_size + 1 ) * bm->array_size;
/* New size is just a multiple of the original size to fit in
the index. */
bm->bitmap = (unsigned char *) realloc(bm->bitmap, new_size);
if (NULL == bm->bitmap) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* zero out the new elements */
for (i = bm->array_size; i < new_size; ++i) {
bm->bitmap[i] = 0;
}
/* Update the array_size */
bm->array_size = new_size;
bm->legal_numbits = bit + 1;
}
/* Now set the bit */
bm->bitmap[index] |= (1 << offset);
return ORTE_SUCCESS;
}
int
orte_bitmap_clear_bit(orte_bitmap_t *bm, size_t bit)
{
size_t index, offset;
if ((bit > bm->legal_numbits - 1) || (NULL == bm)) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
index = bit / SIZE_OF_CHAR;
offset = bit % SIZE_OF_CHAR;
if (index >= bm->array_size) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
bm->bitmap[index] &= ~(1 << offset);
return ORTE_SUCCESS;
}
int
orte_bitmap_is_set_bit(orte_bitmap_t *bm, size_t bit)
{
size_t index, offset;
if ((bit > bm->legal_numbits - 1) || (NULL == bm)) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
index = bit / SIZE_OF_CHAR;
offset = bit % SIZE_OF_CHAR;
if (index >= bm->array_size) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
if (0 != (bm->bitmap[index] & (1 << offset))) {
return (int) true;
}
return (int) false;
}
int
orte_bitmap_clear_all_bits(orte_bitmap_t *bm)
{
if (NULL == bm) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
memset(bm->bitmap, 0, bm->array_size);
return ORTE_SUCCESS;
}
int
orte_bitmap_set_all_bits(orte_bitmap_t *bm)
{
size_t i;
if (NULL == bm) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
for (i = 0; i < bm->array_size; ++i) {
bm->bitmap[i] = ~((char) 0);
}
return ORTE_SUCCESS;
}
int
orte_bitmap_find_and_set_first_unset_bit(orte_bitmap_t *bm, size_t *position)
{
size_t i = 0;
unsigned char temp;
unsigned char all_ones = 0xff;
if (NULL == bm) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* Neglect all which dont have an unset bit */
*position = 0;
while((i < bm->array_size) && (bm->bitmap[i] == all_ones)) {
++i;
}
if (i == bm->array_size) {
/* increase the bitmap size then */
*position = bm->array_size * SIZE_OF_CHAR;
return orte_bitmap_set_bit(bm, *position);
}
/* This one has an unset bit, find its bit number */
temp = bm->bitmap[i];
while (temp & 0x1) {
++(*position);
temp >>= 1;
}
/* Now set the bit number */
bm->bitmap[i] |= (bm->bitmap[i] + 1);
(*position) += i * SIZE_OF_CHAR;
return ORTE_SUCCESS;
}

156
src/class/orte_bitmap.h Обычный файл
Просмотреть файл

@ -0,0 +1,156 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
/** @file
*
* A bitmap implementation. The bits start off with 0, so this bitmap
* has bits numbered as bit 0, bit 1, bit 2 and so on. This bitmap
* has auto-expansion capabilities, that is once the size is set
* during init, it can be automatically expanded by setting the bit
* beyond the current size. But note, this is allowed just when the
* bit is set -- so the valid functions are set_bit and
* find_and_set_bit. Other functions like clear, if passed a bit
* outside the initialized range will result in an error.
*
*/
#ifndef ORTE_BITMAP_H
#define ORTE_BITMAP_H
#include "orte_config.h"
#include <string.h>
#include "include/types.h"
#include "class/ompi_object.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
struct orte_bitmap_t {
ompi_object_t super; /**< Subclass of ompi_object_t */
unsigned char *bitmap; /**< The actual bitmap array of characters */
size_t array_size; /**< The actual array size that maintains the bitmap */
size_t legal_numbits; /**< The number of bits which are legal (the
actual bitmap may contain more bits, since
it needs to be rounded to the nearest
char */
};
typedef struct orte_bitmap_t orte_bitmap_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_bitmap_t);
/**
* Initializes the bitmap and sets its size. This must be called
* before the bitmap can be actually used
*
* @param bitmap The input bitmap (IN)
* @param size The initial size of the bitmap in terms of bits (IN)
* @return ORTE error code or success
*
*/
OMPI_DECLSPEC int orte_bitmap_init (orte_bitmap_t *bm, size_t size);
/**
* Set a bit of the bitmap. If the bit asked for is beyond the current
* size of the bitmap, then the bitmap is extended to accomodate the
* bit
*
* @param bitmap The input bitmap (IN)
* @param bit The bit which is to be set (IN)
* @return OMPI error code or success
*
*/
OMPI_DECLSPEC int orte_bitmap_set_bit(orte_bitmap_t *bm, size_t bit);
/**
* Clear/unset a bit of the bitmap. If the bit is beyond the current
* size of the bitmap, an error is returned
*
* @param bitmap The input bitmap (IN)
* @param bit The bit which is to be cleared (IN)
* @return ORTE error code if the bit is out of range, else success
*
*/
OMPI_DECLSPEC int orte_bitmap_clear_bit(orte_bitmap_t *bm, size_t bit);
/**
* Find out if a bit is set in the bitmap
*
* @param bitmap The input bitmap (IN)
* @param bit The bit which is to be checked (IN)
* @return ORTE error code if the bit is out of range
* 1 if the bit is set
* 0 if the bit is not set
*
*/
OMPI_DECLSPEC int orte_bitmap_is_set_bit(orte_bitmap_t *bm, size_t bit);
/**
* Find the first clear bit in the bitmap and set it
*
* @param bitmap The input bitmap (IN)
* @param position Position of the first clear bit (OUT)
* @return err ORTE_SUCCESS on success
*/
OMPI_DECLSPEC int orte_bitmap_find_and_set_first_unset_bit(orte_bitmap_t *bm,
size_t *position);
/**
* Clear all bits in the bitmap
*
* @param bitmap The input bitmap (IN)
* @return ORTE error code if bm is NULL
*
*/
OMPI_DECLSPEC int orte_bitmap_clear_all_bits(orte_bitmap_t *bm);
/**
* Set all bits in the bitmap
* @param bitmap The input bitmap (IN)
* @return ORTE error code if bm is NULL
*
*/
OMPI_DECLSPEC int orte_bitmap_set_all_bits(orte_bitmap_t *bm);
/**
* Gives the current size (number of bits) in the bitmap. This is the
* legal (accessible) number of bits
*
* @param bitmap The input bitmap (IN)
* @return ORTE error code if bm is NULL
*
*/
static inline int orte_bitmap_size(orte_bitmap_t *bm)
{
return (NULL == bm) ? 0 : bm->legal_numbits;
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -25,6 +25,7 @@
#include "orte_config.h"
#include "class/orte_bitmap.h"
#include "mca/errmgr/errmgr.h"
#include "gpr_replica_fn.h"
@ -37,9 +38,9 @@ bool orte_gpr_replica_check_itag_list(orte_gpr_replica_addr_mode_t addr_mode,
size_t num_itags_entry,
orte_gpr_replica_itag_t *entry_itags)
{
size_t num_found;
bool exclusive, no_match, not_set;
size_t i, j;
bool exclusive, match, found_one, not_set;
int rc;
/* check for trivial case */
if (NULL == itags || 0 == num_itags_search) { /* wildcard case - automatically true */
@ -52,72 +53,68 @@ bool orte_gpr_replica_check_itag_list(orte_gpr_replica_addr_mode_t addr_mode,
not_set = false;
}
/* take care of trivial cases that don't require search */
if ((ORTE_GPR_REPLICA_XAND & addr_mode) &&
(num_itags_search != num_itags_entry)) { /* can't possibly turn out "true" */
if (not_set) return true;
else return false;
if (ORTE_GPR_REPLICA_XAND & addr_mode || ORTE_GPR_REPLICA_XOR & addr_mode) {
exclusive = true;
} else {
exclusive = false;
}
if ((ORTE_GPR_REPLICA_AND & addr_mode) &&
(num_itags_search > num_itags_entry)) { /* can't find enough matches */
if (not_set) return true;
else return false;
if (ORTE_SUCCESS != (rc = orte_bitmap_clear_all_bits(&(orte_gpr_replica_globals.srch_itag)))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* okay, have to search for remaining possibilities */
num_found = 0;
exclusive = true;
for (i=0; i < num_itags_entry; i++) {
no_match = true;
for (j=0; j < num_itags_search; j++) {
/* run the search - check the container's tags to see which search tags are found */
found_one = false;
for (i=0; i < num_itags_entry; i++) { /* for each container tag */
match = false;
for (j=0; j < num_itags_search; j++) { /* check all the search tags and see if it is present */
if (entry_itags[i] == itags[j]) { /* found a match */
num_found++;
no_match = false;
if (ORTE_SUCCESS != (rc = orte_bitmap_set_bit(&(orte_gpr_replica_globals.srch_itag), itags[j]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_GPR_REPLICA_OR & addr_mode) { /* only need one match */
if (not_set) return false;
else return true;
}
match = true;
found_one = true;
}
}
if (no_match) {
exclusive = false;
}
}
if (ORTE_GPR_REPLICA_XAND & addr_mode) { /* deal with XAND case */
if (num_found == num_itags_entry) { /* found all, and nothing more */
if (not_set) return false;
else return true;
} else { /* found either too many or not enough */
if (not_set) return true;
if (!match && exclusive) {
/* if it was exclusive, then I'm not allowed to have any tags outside
* of those in the search list. Since I checked the search list and
* didn't find a match, this violates the exclusive requirement.
*/
if (not_set) return true;
else return false;
}
}
}
if (ORTE_GPR_REPLICA_XOR & addr_mode) { /* deal with XOR case */
if (num_found > 0 && exclusive) { /* found at least one and nothing not on list */
if (not_set) return false;
else return true;
} else {
if (not_set) return true;
else return false;
}
/* If we get here, then we know we have passed the exclusive test. We also know
* that we would have already returned in the OR case. So, first check the XOR
* case
*/
if (ORTE_GPR_REPLICA_XOR && found_one) {
if (not_set) return false;
else return true;
}
/* Only thing we have left to check is AND */
/* check if all the search tags were found */
for (i=0; i < num_itags_search; i++) {
if (1 != orte_bitmap_is_set_bit(&(orte_gpr_replica_globals.srch_itag), itags[i])) {
/* this tag was NOT found - required to find them all */
if (not_set) return true;
else return false;
}
}
if (ORTE_GPR_REPLICA_AND & addr_mode) { /* deal with AND case */
if (num_found == num_itags_search) { /* found all the required keys */
if (not_set) return false;
else return true;
} else {
if (not_set) return true;
else return false;
}
}
/* should be impossible situation, but just to be safe... */
if (not_set) return true;
else return false;
/* okay, all the tags are there, so we now passed the AND test */
if (not_set) return false;
else return true;
}

Просмотреть файл

@ -26,6 +26,7 @@
#include <time.h>
#include "class/orte_bitmap.h"
#include "class/orte_pointer_array.h"
#include "class/orte_value_array.h"
@ -92,6 +93,7 @@ typedef struct {
int compound_cmd_waiting;
orte_pointer_array_t *srch_cptr;
orte_pointer_array_t *srch_ival;
orte_bitmap_t srch_itag;
} orte_gpr_replica_globals_t;

Просмотреть файл

@ -25,6 +25,7 @@
*/
#include "orte_config.h"
#include "class/orte_bitmap.h"
#include "class/ompi_object.h"
#include "util/output.h"
#include "util/proc_info.h"
@ -520,6 +521,8 @@ int orte_gpr_replica_close(void)
orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority)
{
int rc;
/* If we are to host a replica, then we want to be selected, so do all the
setup and return the module */
@ -549,17 +552,19 @@ orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bo
orte_gpr_replica_globals.compound_cmd = NULL;
/* initialize the registry head */
if (ORTE_SUCCESS != orte_pointer_array_init(&(orte_gpr_replica.segments),
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.segments),
orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size)) {
orte_gpr_replica_globals.block_size))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
if (ORTE_SUCCESS != orte_pointer_array_init(&(orte_gpr_replica.triggers),
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.triggers),
orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size)) {
orte_gpr_replica_globals.block_size))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
@ -567,12 +572,19 @@ orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bo
OBJ_CONSTRUCT(&orte_gpr_replica.callbacks, ompi_list_t);
/* initialize the search arrays for temporarily storing search results */
if (ORTE_SUCCESS != orte_pointer_array_init(&(orte_gpr_replica_globals.srch_cptr),
100, orte_gpr_replica_globals.max_size, 100)) {
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.srch_cptr),
100, orte_gpr_replica_globals.max_size, 100))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
if (ORTE_SUCCESS != orte_pointer_array_init(&(orte_gpr_replica_globals.srch_ival),
100, orte_gpr_replica_globals.max_size, 100)) {
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.srch_ival),
100, orte_gpr_replica_globals.max_size, 100))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
if (ORTE_SUCCESS != (rc = orte_bitmap_init (&(orte_gpr_replica_globals.srch_itag), 64))) {
ORTE_ERROR_LOG(rc);
return NULL;
}

Просмотреть файл

@ -42,16 +42,6 @@ extern "C" {
/*
* Internal definitions
*/
/*
* define the command names/ids for use in OOB buffers.
* only needed for remotely executed commands.
*/
#define ORTE_NS_CREATE_CELLID 0x01
#define ORTE_NS_CREATE_JOBID 0x02
#define ORTE_NS_RESERVE_RANGE 0x04
#define ORTE_NS_FREE_NAME 0x08
#define ORTE_NS_GET_MY_CELLID 0x10
typedef uint8_t orte_ns_cmd_bitmask_t;
/*
@ -60,7 +50,7 @@ typedef uint8_t orte_ns_cmd_bitmask_t;
/* CAUTION - any changes here must also change corresponding
* typedefs above
*/
#define ORTE_NS_CMD ORTE_INT16
#define ORTE_NS_CMD ORTE_INT8
/*
* define flag values for remote commands - only used internally
@ -70,6 +60,7 @@ typedef uint8_t orte_ns_cmd_bitmask_t;
#define ORTE_NS_RESERVE_RANGE_CMD 0x04
#define ORTE_NS_ASSIGN_OOB_TAG_CMD 0x08
#define ORTE_NS_DEFINE_DATA_TYPE_CMD 0x10
#define ORTE_NS_CREATE_MY_NAME_CMD 0x20
/*
* function definitions
@ -153,6 +144,9 @@ OMPI_DECLSPEC int orte_ns_base_define_data_type_not_available(
const char *name,
orte_data_type_t *type);
OMPI_DECLSPEC int orte_ns_base_create_my_name_not_available(void);
/* Base functions used everywhere */
OMPI_DECLSPEC int orte_ns_base_set_my_name(void);
OMPI_DECLSPEC int orte_ns_base_get_peers(orte_process_name_t **procs,

Просмотреть файл

@ -104,6 +104,14 @@ orte_ns_base_define_data_type_not_available(
return ORTE_ERR_UNREACH;
}
int
orte_ns_base_create_my_name_not_available(void)
{
ORTE_ERROR_LOG(ORTE_ERR_UNREACH);
return ORTE_ERR_UNREACH;
}
/*
* functions

Просмотреть файл

@ -38,17 +38,20 @@ int orte_ns_base_set_my_name(void)
{
int rc, id;
char *mode;
orte_jobid_t jobid;
orte_vpid_t vpid;
/* check to see if name has already been set - if so, leave it alone */
/* check to see if name has already been set - if so, THIS IS AN ERROR */
if (NULL != orte_process_info.my_name) {
return ORTE_SUCCESS;
ompi_output(0, "my name was set to [%d,%d,%d]", ORTE_NAME_ARGS(orte_process_info.my_name));
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
return ORTE_ERR_FATAL;
}
/* first check if we are seed or singleton that couldn't
* join an existing universe - if so, name is mandated */
* join an existing universe - if so, name is mandated, and we need
* to set the singleton flag so that our job infrastructure gets built */
if (orte_process_info.seed || NULL == orte_process_info.ns_replica) {
orte_process_info.singleton = true;
return orte_ns_base_create_process_name(
&(orte_process_info.my_name), 0, 0, 0);
}
@ -66,22 +69,26 @@ int orte_ns_base_set_my_name(void)
nds++;
}
}
if (ORTE_SUCCESS != (rc = orte_ns.create_jobid(&jobid))) {
/* if the name wasn't passed to us, and we are not the seed, then
* we must be a singleton. We need to get a name assigned by the seed
* daemon, so we call the name service to do that, and then set the
* singleton flag.
*/
if (ORTE_SUCCESS != (rc = orte_ns.create_my_name())) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, 1, &vpid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name),
0, jobid, vpid))) {
if (ORTE_SUCCESS != (rc = orte_ns.get_vpid(&vpid, orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
orte_process_info.num_procs = 1;
orte_process_info.vpid_start = vpid;
orte_process_info.singleton = true;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -34,11 +34,9 @@ int orte_ns_nds_env_get(void)
int vpid_start;
int num_procs;
char* name_string = NULL;
id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL);
mca_base_param_lookup_string(id, &name_string);
if(name_string != NULL) {
if (ORTE_SUCCESS != (rc = orte_ns_base_convert_string_to_process_name(
&(orte_process_info.my_name),
name_string))) {

Просмотреть файл

@ -54,6 +54,7 @@ OMPI_DECLSPEC mca_ns_base_module_t orte_ns = {
orte_ns_base_assign_cellid_to_process,
orte_ns_base_create_jobid_not_available,
orte_ns_base_create_process_name,
orte_ns_base_create_my_name_not_available,
orte_ns_base_copy_process_name,
orte_ns_base_convert_string_to_process_name,
orte_ns_base_get_vpid_range_not_available,

Просмотреть файл

@ -177,6 +177,18 @@ typedef int (*orte_ns_base_module_create_proc_name_fn_t)(orte_process_name_t **n
orte_jobid_t job,
orte_vpid_t vpid);
/*
* Create my name
* If a process is a singleton, then it needs to create a name for itself. When
* a persistent daemon is present, this requires a communication to that daemon.
* Since the RML uses process names as its index into the RML communicator table,
* the RML automatically assigns a name to each process when it first attempts
* to communicate. This function takes advantage of that behavior to ensure that
* one, and ONLY one, name gets assigned to the process
*/
typedef int (*orte_ns_base_module_create_my_name_fn_t)(void);
/**
* Derive a process vpid.
* Given a base vpid and an offset, return the computed equivalent vpid. This function
@ -594,6 +606,7 @@ struct mca_ns_base_module_1_0_0_t {
orte_ns_base_module_assign_cellid_to_process_fn_t assign_cellid_to_process;
orte_ns_base_module_create_jobid_fn_t create_jobid;
orte_ns_base_module_create_proc_name_fn_t create_process_name;
orte_ns_base_module_create_my_name_fn_t create_my_name;
orte_ns_base_module_copy_proc_name_fn_t copy_process_name;
orte_ns_base_module_convert_string_to_process_name_fn_t convert_string_to_process_name;
orte_ns_base_module_reserve_range_fn_t reserve_range;

Просмотреть файл

@ -538,3 +538,41 @@ int orte_ns_proxy_define_data_type(const char *name,
return rc;
}
/*
* Take advantage of the way the RML uses the process name as its index into
* the RML communicator table. Because the RML needs a name right away, it will
* automatically assign us one when it receives a message - and it communicates
* that assignment back to us automatically. Thus, to get a name for ourselves,
* all we have to do is send a message! No response from the replica is required.
*/
int orte_ns_proxy_create_my_name(void)
{
orte_buffer_t* cmd;
orte_ns_cmd_flag_t command;
size_t count;
int rc;
command = ORTE_NS_CREATE_MY_NAME_CMD;
cmd = OBJ_NEW(orte_buffer_t);
if (cmd == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_NS_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
if (0 > orte_rml.send_buffer(orte_ns_my_replica, cmd, MCA_OOB_TAG_NS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);
return ORTE_ERR_COMM_FAILURE;
}
OBJ_RELEASE(cmd);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -102,6 +102,8 @@ int orte_ns_proxy_assign_rml_tag(orte_rml_tag_t *tag, char *name);
int orte_ns_proxy_define_data_type(const char *name,
orte_data_type_t *type);
int orte_ns_proxy_create_my_name(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -71,6 +71,7 @@ static mca_ns_base_module_t orte_ns_proxy = {
orte_ns_base_assign_cellid_to_process,
orte_ns_proxy_create_jobid,
orte_ns_base_create_process_name,
orte_ns_proxy_create_my_name,
orte_ns_base_copy_process_name,
orte_ns_base_convert_string_to_process_name,
orte_ns_proxy_reserve_range,

Просмотреть файл

@ -258,3 +258,26 @@ int orte_ns_replica_define_data_type(const char *name,
}
int orte_ns_replica_create_my_name(void)
{
orte_jobid_t jobid;
orte_vpid_t vpid;
int rc;
if (ORTE_SUCCESS != (rc = orte_ns.create_jobid(&jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, 1, &vpid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name),
0, jobid, vpid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -145,6 +145,8 @@ int orte_ns_replica_assign_rml_tag(orte_rml_tag_t *tag,
int orte_ns_replica_define_data_type(const char *name,
orte_data_type_t *type);
int orte_ns_replica_create_my_name(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -74,6 +74,7 @@ static mca_ns_base_module_t orte_ns_replica = {
orte_ns_base_assign_cellid_to_process,
orte_ns_replica_create_jobid,
orte_ns_base_create_process_name,
orte_ns_replica_create_my_name,
orte_ns_base_copy_process_name,
orte_ns_base_convert_string_to_process_name,
orte_ns_replica_reserve_range,
@ -352,6 +353,8 @@ int orte_ns_replica_finalize(void)
/*
* handle message from proxies
* NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program.
* DO NOT RELEASE THIS BUFFER IN THIS CODE
*/
void orte_ns_replica_recv(int status, orte_process_name_t* sender,
@ -523,6 +526,11 @@ void orte_ns_replica_recv(int status, orte_process_name_t* sender,
}
break;
case ORTE_NS_CREATE_MY_NAME_CMD:
/* ignore this command */
goto CLEANUP;
break;
default:
goto RETURN_ERROR;
}

Просмотреть файл

@ -316,6 +316,16 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
ompi_argv_append(&argc, &argv, "--name");
proc_name_index = argc;
ompi_argv_append(&argc, &argv, "");
/* tell the daemon how many procs are in the daemon's job */
ompi_argv_append(&argc, &argv, "--num_procs");
asprintf(&param, "%lu", (unsigned long)(vpid + num_nodes));
ompi_argv_append(&argc, &argv, param);
free(param);
/* tell the daemon the starting vpid of the daemon's job */
ompi_argv_append(&argc, &argv, "--vpid_start");
ompi_argv_append(&argc, &argv, "0");
ompi_argv_append(&argc, &argv, "--nodename");
node_name_index2 = argc;
ompi_argv_append(&argc, &argv, "");

Просмотреть файл

@ -62,6 +62,7 @@ int orte_init_stage1(void)
char *contact_path = NULL;
pid_t pid;
orte_universe_t univ;
orte_jobid_t my_jobid;
/* Ensure the system_info structure is instantiated and initialized */
if (ORTE_SUCCESS != (ret = orte_sys_info())) {
@ -340,18 +341,22 @@ int orte_init_stage1(void)
return ret;
}
/* if we are the seed, setup jobid-0 */
/* if we are a singleton, setup the infrastructure for our job */
if(orte_process_info.seed) {
if (ORTE_SUCCESS != (ret = orte_rmgr_base_set_job_slots(0,1))) {
if(orte_process_info.singleton) {
if (ORTE_SUCCESS != (ret = orte_ns.get_jobid(&my_jobid, orte_process_info.my_name))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_rmaps_base_set_vpid_range(0,0,1))) {
if (ORTE_SUCCESS != (ret = orte_rmgr_base_set_job_slots(my_jobid,1))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_rmgr_base_proc_stage_gate_init(0))) {
if (ORTE_SUCCESS != (ret = orte_rmaps_base_set_vpid_range(my_jobid,0,1))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_rmgr_base_proc_stage_gate_init(my_jobid))) {
ORTE_ERROR_LOG(ret);
return ret;
}

Просмотреть файл

@ -36,6 +36,7 @@
#include "threads/condition.h"
#include "dps/dps.h"
#include "util/ompi_environ.h"
#include "util/output.h"
#include "util/show_help.h"
#include "util/sys_info.h"
@ -105,6 +106,14 @@ ompi_cmd_line_init_t orte_cmd_line_opts[] = {
&orted_globals.name, OMPI_CMD_LINE_TYPE_STRING,
"Set the orte process name"},
{ NULL, NULL, NULL, '\0', NULL, "vpid_start", 1,
&orted_globals.vpid_start, OMPI_CMD_LINE_TYPE_STRING,
"Set the starting vpid for this job"},
{ NULL, NULL, NULL, '\0', NULL, "num_procs", 1,
&orted_globals.num_procs, OMPI_CMD_LINE_TYPE_STRING,
"Set the number of process in this job"},
{ NULL, NULL, NULL, '\0', NULL, "nsreplica", 1,
&orte_process_info.ns_replica_uri, OMPI_CMD_LINE_TYPE_STRING,
"Name service contact information."},
@ -146,6 +155,8 @@ ompi_cmd_line_init_t orte_cmd_line_opts[] = {
NULL, OMPI_CMD_LINE_TYPE_NULL, NULL }
};
extern char **environ;
int main(int argc, char *argv[])
{
@ -155,9 +166,8 @@ int main(int argc, char *argv[])
char *log_path = NULL;
char log_file[PATH_MAX];
char *jobidstring;
int i;
fprintf(stderr, "orted\n");
/* setup to check common command line options that just report and die */
memset(&orted_globals, 0, sizeof(orted_globals_t));
cmd_line = OBJ_NEW(ompi_cmd_line_t);
@ -190,14 +200,34 @@ int main(int argc, char *argv[])
orte_process_info.daemon = true;
/*
* Attempt to parse the daemon name and save in proc_info
* If the daemon was given a name on the command line, need to set the
* proper indicators in the environment so the name discovery service
* can find it
*/
if (orted_globals.name) {
ret = orte_ns_base_convert_string_to_process_name(
&orte_process_info.my_name, orted_globals.name);
if(ORTE_SUCCESS != ret) {
fprintf(stderr, "Couldn't convert environmental string to process name\n");
return 1;
if (ORTE_SUCCESS != (ret = ompi_setenv("OMPI_MCA_ns_nds",
"env", true, &environ))) {
fprintf(stderr, "orted: could not set my name in environ\n");
return ret;
}
if (ORTE_SUCCESS != (ret = ompi_setenv("OMPI_MCA_ns_nds_name",
orted_globals.name, true, &environ))) {
fprintf(stderr, "orted: could not set my name in environ\n");
return ret;
}
/* the following values are meaningless to the daemon, but may have
* been passed in anyway. we set them here because the nds_env component
* requires that they be set
*/
if (ORTE_SUCCESS != (ret = ompi_setenv("OMPI_MCA_ns_nds_vpid_start",
orted_globals.vpid_start, true, &environ))) {
fprintf(stderr, "orted: could not set vpid_start in environ\n");
return ret;
}
if (ORTE_SUCCESS != (ret = ompi_setenv("OMPI_MCA_ns_nds_num_procs",
orted_globals.num_procs, true, &environ))) {
fprintf(stderr, "orted: could not set num_procs in environ\n");
return ret;
}
}

Просмотреть файл

@ -54,6 +54,8 @@ typedef struct {
bool debug_daemons;
bool debug_daemons_file;
char* name;
char* vpid_start;
char* num_procs;
char* universe;
int bootproxy;
int uri_pipe;

Просмотреть файл

@ -33,6 +33,7 @@
OMPI_DECLSPEC orte_proc_info_t orte_process_info = {
/* .my_name = */ NULL,
/* .singleton = */ false,
/* .vpid_start = */ 0,
/* .num_procs = */ 1,
/* .pid = */ 0,
@ -148,6 +149,7 @@ int orte_proc_info_finalize(void)
}
orte_process_info.seed = false;
orte_process_info.singleton = false;
orte_process_info.daemon = false;
return ORTE_SUCCESS;

Просмотреть файл

@ -46,6 +46,7 @@ extern "C" {
*/
struct orte_proc_info_t {
orte_process_name_t *my_name; /**< My official process name */
bool singleton; /**< I am a singleton */
orte_vpid_t vpid_start; /**< starting vpid for this job */
size_t num_procs; /**< number of processes in this job */
pid_t pid; /**< Local process ID for this process */