1
1

This commit looks a lot bigger than it is, so relax :-)

Fix the problem observed by multiple people that comm_spawned children were (once again) being mapped onto the same nodes as their parents. This was caused by going through the RAS a second time, thus overwriting the mapper's bookkeeping that told RMAPS where it had left off.

To solve this - and to continue moving forward on the ORTE development - we introduce the concept of attributes to control the behavior of the RM frameworks. I defined the attributes and a list of attributes as new ORTE data types to make it easier for people to pass them around (since they are now fundamental to the system, and therefore we will be packing and unpacking them frequently). Thus, all the functions to manipulate attributes can be implemented and debugged in one place.

I used those capabilities in two places:

1. Added an attribute list to the rmgr.spawn interface.

2. Added an attribute list to the ras.allocate interface. At the moment, the only attribute I modified the various RAS components to recognize is the USE_PARENT_ALLOCATION one (as defined in rmgr_types.h).

So the RAS components now know how to reuse an allocation. I have debugged this under rsh, but it now needs to be tested on a wider set of platforms.

This commit was SVN r12138.
Этот коммит содержится в:
Ralph Castain 2006-10-17 16:06:17 +00:00
родитель 3f88937081
Коммит 13227e36ab
42 изменённых файлов: 1055 добавлений и 96 удалений

Просмотреть файл

@ -354,6 +354,8 @@ ompi_comm_start_processes(int count, char **array_of_commands,
orte_std_cntr_t num_apps, ai;
orte_jobid_t new_jobid=ORTE_JOBID_INVALID;
orte_app_context_t **apps=NULL;
opal_list_t attributes;
/* parse the info object */
@ -371,6 +373,9 @@ ompi_comm_start_processes(int count, char **array_of_commands,
/* make sure the progress engine properly trips the event library */
opal_progress_event_increment();
/* setup to record the attributes */
OBJ_CONSTRUCT(&attributes, opal_list_t);
/* we want to be able to default the prefix to the one used for this job
* so that the ompi executables and libraries can be found. the user can
* later override this value by providing an MPI_Info value. for now, though,
@ -538,8 +543,19 @@ ompi_comm_start_processes(int count, char **array_of_commands,
/* cleanup */
if (NULL != base_prefix) free(base_prefix);
/* tell the RTE that we want to the children to run inside of our allocation -
* don't go get one just for them
*/
if (ORTE_SUCCESS != (rc = orte_rmgr.add_attribute(&attributes, ORTE_RMGR_USE_PARENT_ALLOCATION,
ORTE_JOBID, &(orte_process_info.my_name->jobid)))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&attributes);
opal_progress_event_decrement();
return MPI_ERR_SPAWN;
}
/* spawn procs */
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, ORTE_PROC_STATE_NONE))) {
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, ORTE_PROC_STATE_NONE, &attributes))) {
ORTE_ERROR_LOG(rc);
opal_progress_event_decrement();
return MPI_ERR_SPAWN;

Просмотреть файл

@ -126,11 +126,13 @@ typedef void* orte_iov_base_ptr_t;
#define ORTE_JOB_MAP (orte_data_type_t) 54 /**< map of process locations */
#define ORTE_MAPPED_PROC (orte_data_type_t) 55 /**< process entry on map */
#define ORTE_MAPPED_NODE (orte_data_type_t) 56 /**< node entry on map */
#define ORTE_ATTRIBUTE (orte_data_type_t) 57 /**< attribute used to control framework behavior */
#define ORTE_ATTR_LIST (orte_data_type_t) 58 /**< list of attributes */
/* DAEMON communication type */
#define ORTE_DAEMON_CMD (orte_data_type_t) 57 /**< command flag for communicating with the daemon */
#define ORTE_DAEMON_CMD (orte_data_type_t) 59 /**< command flag for communicating with the daemon */
/* define the starting point for dynamically assigning data types */
#define ORTE_DSS_ID_DYNAMIC 60
#define ORTE_DSS_ID_DYNAMIC 70
#endif

Просмотреть файл

@ -69,7 +69,7 @@ static void orte_gpr_keyval_destructor(orte_gpr_keyval_t* keyval)
/* define instance of opal_class_t */
OBJ_CLASS_INSTANCE(
orte_gpr_keyval_t, /* type name */
opal_object_t, /* parent "class" name */
opal_list_item_t, /* parent "class" name */
orte_gpr_keyval_construct, /* constructor */
orte_gpr_keyval_destructor); /* destructor */

Просмотреть файл

@ -36,9 +36,7 @@ int orte_gpr_base_put_1(orte_gpr_addr_mode_t addr_mode,
{
orte_gpr_value_t *values;
orte_gpr_value_t value = ORTE_GPR_VALUE_EMPTY;
orte_gpr_keyval_t *keyvals;
orte_gpr_keyval_t keyval = ORTE_GPR_KEYVAL_EMPTY;
orte_data_value_t dval = ORTE_DATA_VALUE_EMPTY;
orte_gpr_keyval_t *keyval;
orte_std_cntr_t i;
int rc;
@ -47,12 +45,9 @@ int orte_gpr_base_put_1(orte_gpr_addr_mode_t addr_mode,
value.addr_mode = addr_mode;
value.segment = segment;
value.cnt = 1;
keyvals = &keyval;
value.keyvals = &keyvals;
keyval.key = key;
keyval.value = &dval;
dval.type = data_value->type;
if (ORTE_SUCCESS != (rc = orte_dss.set(&dval, data_value->data, data_value->type))) {
value.keyvals = &keyval;
if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&keyval, key,
data_value->type, data_value->data))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -70,10 +65,13 @@ int orte_gpr_base_put_1(orte_gpr_addr_mode_t addr_mode,
/* put the value on the registry */
if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &values))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(keyval);
return rc;
}
/* no memory to clean up since we didn't allocate any */
/* cleanup */
OBJ_RELEASE(keyval);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -41,8 +41,7 @@ int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id,
void *user_tag)
{
orte_gpr_value_t *values;
orte_gpr_keyval_t *keyvals;
orte_gpr_keyval_t keyval = ORTE_GPR_KEYVAL_EMPTY;
orte_gpr_keyval_t *keyval;
orte_gpr_value_t value = ORTE_GPR_VALUE_EMPTY;
orte_gpr_subscription_t *subs;
orte_gpr_subscription_t sub = ORTE_GPR_SUBSCRIPTION_EMPTY;
@ -66,8 +65,7 @@ int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id,
value.addr_mode = addr_mode;
value.segment = segment;
value.cnt = 1;
keyvals = &keyval;
value.keyvals = &keyvals;
value.keyvals = &keyval;
value.tokens = tokens;
/* must count the number of tokens */
@ -78,8 +76,12 @@ int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id,
}
}
keyval.key = key;
if (ORTE_SUCCESS != (rc = orte_gpr_base_create_keyval(&keyval, key,
ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send the subscription */
if (NULL == trig_name) { /* no trigger provided */
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 0, NULL))) {
@ -95,7 +97,8 @@ int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id,
}
/* no memory to cleanup since we didn't allocate anything */
/* cleanup */
OBJ_RELEASE(keyval);
/* return the subscription id */
*id = sub.id;

Просмотреть файл

@ -40,10 +40,9 @@
#include "orte/mca/schema/schema.h"
#include "opal/class/opal_object.h"
#include "orte/class/orte_pointer_array.h"
#include "orte/dss/dss_types.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/rmgr/rmgr_types.h"
#include "orte/mca/smr/smr_types.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -121,13 +120,12 @@ typedef uint16_t orte_gpr_addr_mode_t;
* Key-value pairs for registry operations
*/
typedef struct {
opal_object_t super; /* required for this to be an object */
opal_list_item_t super; /* required for this to be on a list */
char *key; /* string key for this value */
orte_data_value_t *value; /* value */
} orte_gpr_keyval_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_keyval_t);
#define ORTE_GPR_KEYVAL_EMPTY {{OBJ_CLASS(orte_gpr_keyval_t),0}, NULL, NULL}
/** Return value structure for registry requests.

Просмотреть файл

@ -17,12 +17,16 @@
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "orte/orte_constants.h"
#include "opal/class/opal_list.h"
#include "orte/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/ras/base/ras_private.h"
@ -30,14 +34,13 @@
* Function for selecting one component from all those that are
* available.
*/
int orte_ras_base_allocate(orte_jobid_t jobid)
int orte_ras_base_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
int ret;
opal_list_item_t *item;
orte_ras_base_cmp_t *cmp;
/* If the list is empty, return NULL */
/* If no components are available, then return an error */
if (opal_list_is_empty(&orte_ras_base.ras_available)) {
opal_output(orte_ras_base.ras_output,
"orte:ras:base:select: no components available!");
@ -59,7 +62,7 @@ int orte_ras_base_allocate(orte_jobid_t jobid)
cmp->component->ras_version.mca_component_name);
if (NULL != cmp->module->allocate_job) {
ret = cmp->module->allocate_job(jobid);
ret = cmp->module->allocate_job(jobid, attributes);
if (ORTE_SUCCESS == ret) {
bool empty;
@ -96,3 +99,35 @@ int orte_ras_base_deallocate(orte_jobid_t job)
return ORTE_SUCCESS;
}
/*
* Reallocate nodes so another jobid can use them in addition to the
* specified one
*/
int orte_ras_base_reallocate(orte_jobid_t parent_jobid,
orte_jobid_t child_jobid)
{
opal_list_t current_alloc;
opal_list_item_t *item;
int rc;
OBJ_CONSTRUCT(&current_alloc, opal_list_t);
if (ORTE_SUCCESS != (rc = orte_ras_base_node_query_alloc(&current_alloc, parent_jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&current_alloc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_node_assign(&current_alloc, child_jobid))) {
ORTE_ERROR_LOG(rc);
}
/* clean up memory */
while (NULL != (item = opal_list_remove_first(&current_alloc))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&current_alloc);
return rc;
}

Просмотреть файл

@ -35,6 +35,7 @@
#include "orte/dss/dss.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/ras/base/ras_private.h"
@ -93,6 +94,8 @@ void orte_ras_base_recv(int status, orte_process_name_t* sender,
orte_ras_cmd_flag_t command;
orte_std_cntr_t count;
orte_jobid_t job;
opal_list_item_t *item;
opal_list_t attrs;
int rc;
count = 1;
@ -115,9 +118,23 @@ void orte_ras_base_recv(int status, orte_process_name_t* sender,
goto SEND_ANSWER;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_allocate(job))) {
/* get the list of attributes */
OBJ_CONSTRUCT(&attrs, opal_list_t);
count = 1;
if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &attrs, &count, ORTE_ATTR_LIST))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP_ALLOC;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_allocate(job, &attrs))) {
ORTE_ERROR_LOG(rc);
}
CLEANUP_ALLOC:
while (NULL != (item = opal_list_remove_first(&attrs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&attrs);
break;
case ORTE_RAS_DEALLOCATE_CMD:

Просмотреть файл

@ -27,6 +27,8 @@
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/class/opal_list.h"
#include "orte/dss/dss_types.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/rml/rml_types.h"
@ -51,7 +53,7 @@ typedef uint8_t orte_ras_cmd_flag_t;
/*
* API function definitions
*/
ORTE_DECLSPEC int orte_ras_base_allocate(orte_jobid_t job);
ORTE_DECLSPEC int orte_ras_base_allocate(orte_jobid_t job, opal_list_t *attributes);
ORTE_DECLSPEC int orte_ras_base_deallocate(orte_jobid_t job);
/*
@ -75,6 +77,9 @@ orte_ras_node_t* orte_ras_base_node_lookup_no_op(orte_cellid_t, const char* node
ORTE_DECLSPEC int orte_ras_base_allocate_nodes(orte_jobid_t jobid,
opal_list_t* nodes);
ORTE_DECLSPEC int orte_ras_base_reallocate(orte_jobid_t parent_jobid,
orte_jobid_t child_jobid);
/*
* Query the registry for all available nodes
*/

Просмотреть файл

@ -27,6 +27,7 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/errmgr/errmgr.h"
@ -236,14 +237,33 @@ cleanup:
*
*/
static int orte_ras_bjs_allocate(orte_jobid_t jobid)
static int orte_ras_bjs_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
opal_list_t nodes;
opal_list_item_t* item;
int rc;
orte_app_context_t **context = NULL;
orte_std_cntr_t i, num_context;
orte_jobid_t *jptr;
orte_attribute_t *attr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
rc = orte_rmgr.get_app_context(jobid, &context, &num_context);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -23,6 +23,7 @@
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/proc_info.h"
@ -34,7 +35,7 @@
/*
* Local functions
*/
static int orte_ras_dash_host_allocate(orte_jobid_t jobid);
static int orte_ras_dash_host_allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int orte_ras_dash_host_deallocate(orte_jobid_t jobid);
static int orte_ras_dash_host_finalize(void);
@ -65,7 +66,7 @@ orte_ras_base_module_t *orte_ras_dash_host_init(int* priority)
}
static int orte_ras_dash_host_allocate(orte_jobid_t jobid)
static int orte_ras_dash_host_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
opal_list_t nodes;
opal_list_item_t* item;
@ -75,7 +76,26 @@ static int orte_ras_dash_host_allocate(orte_jobid_t jobid)
char **mapped_nodes = NULL, **mini_map;
orte_ras_node_t *node;
bool empty;
orte_jobid_t *jptr;
orte_attribute_t *attr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/* If the node segment is not empty, do nothing */
if (ORTE_SUCCESS != (rc = orte_ras_base_node_segment_empty(&empty))) {

Просмотреть файл

@ -32,6 +32,7 @@
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ns/ns.h"
@ -43,7 +44,7 @@
/*
* Local functions
*/
static int orte_ras_gridengine_allocate(orte_jobid_t jobid);
static int orte_ras_gridengine_allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int orte_ras_gridengine_discover(opal_list_t* nodelist,
orte_app_context_t** context, orte_std_cntr_t num_context);
static int orte_ras_gridengine_deallocate(orte_jobid_t jobid);
@ -70,14 +71,33 @@ orte_ras_base_module_t orte_ras_gridengine_module = {
* requested number of nodes/process slots to the job.
*
*/
static int orte_ras_gridengine_allocate(orte_jobid_t jobid)
static int orte_ras_gridengine_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
opal_list_t nodes;
opal_list_item_t* item;
int rc;
orte_app_context_t **context = NULL;
orte_std_cntr_t i, num_context;
orte_jobid_t *jptr;
orte_attribute_t *attr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/* get the context */
rc = orte_rmgr.get_app_context(jobid, &context, &num_context);
if(ORTE_SUCCESS != rc) {

Просмотреть файл

@ -17,23 +17,25 @@
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "orte/mca/ras/base/ras_private.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/proc_info.h"
#include "orte/dss/dss.h"
#include "orte/util/proc_info.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/base/ras_private.h"
#include "orte/mca/ras/hostfile/ras_hostfile.h"
/*
* Local functions
*/
static int orte_ras_hostfile_allocate(orte_jobid_t jobid);
static int orte_ras_hostfile_allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int orte_ras_hostfile_deallocate(orte_jobid_t jobid);
static int orte_ras_hostfile_finalize(void);
@ -81,14 +83,33 @@ orte_ras_base_module_t *orte_ras_hostfile_init(int* priority)
* then examine the resources segment and pull out all nodes that came
* from a hostfile and put them on the nodes segment.
*/
static int orte_ras_hostfile_allocate(orte_jobid_t jobid)
static int orte_ras_hostfile_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
opal_list_t nodes;
opal_list_item_t* item;
int rc;
orte_jobid_t *jptr;
orte_attribute_t *attr;
OBJ_CONSTRUCT(&nodes, opal_list_t);
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/* Query for all nodes in the node segment that have been
allocated to this job */
if (ORTE_SUCCESS != (rc = orte_ras_base_node_query_alloc(&nodes, jobid))) {

Просмотреть файл

@ -32,6 +32,8 @@
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include <errno.h>
#include <unistd.h>
@ -41,17 +43,18 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "orte/mca/ras/base/ras_private.h"
#include "orte/dss/dss.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_loadleveler.h"
/*
* Local functions
*/
static int orte_ras_loadleveler_allocate(orte_jobid_t jobid);
static int orte_ras_loadleveler_allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int orte_ras_loadleveler_deallocate(orte_jobid_t jobid);
static int orte_ras_loadleveler_finalize(void);
static int orte_ras_loadleveler_get_hostlist(int * num_hosts, char*** hostlist);
@ -76,7 +79,7 @@ orte_ras_base_module_t orte_ras_loadleveler_module = {
* Discover available (pre-allocated) nodes. Allocate the
* requested number of nodes/process slots to the job.
*/
static int orte_ras_loadleveler_allocate(orte_jobid_t jobid)
static int orte_ras_loadleveler_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
int i, rc, ret;
opal_list_t nodes_list;
@ -84,7 +87,26 @@ static int orte_ras_loadleveler_allocate(orte_jobid_t jobid)
orte_ras_node_t* node;
char ** hostlist = NULL;
int num_hosts = 0;
orte_jobid_t *jptr;
orte_attribute_t *attr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
rc = orte_ras_loadleveler_get_hostlist(&num_hosts, &hostlist);
if(ORTE_SUCCESS != rc) {
return rc;

Просмотреть файл

@ -23,6 +23,7 @@
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "orte/dss/dss.h"
#include "orte/util/sys_info.h"
#include "orte/mca/ras/base/ras_private.h"
#include "orte/mca/rmgr/base/base.h"
@ -35,7 +36,7 @@
/*
* Local functions
*/
static int orte_ras_localhost_allocate(orte_jobid_t jobid);
static int orte_ras_localhost_allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int orte_ras_localhost_deallocate(orte_jobid_t jobid);
static int orte_ras_localhost_finalize(void);
@ -66,14 +67,33 @@ orte_ras_base_module_t *orte_ras_localhost_init(int* priority)
}
static int orte_ras_localhost_allocate(orte_jobid_t jobid)
static int orte_ras_localhost_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
bool empty;
int ret;
opal_list_t nodes;
orte_ras_node_t *node;
opal_list_item_t *item;
orte_attribute_t *attr;
orte_jobid_t *jptr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (ret = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(ret);
return ret;
}
return ORTE_SUCCESS;
}
/* If the node segment is not empty, do nothing */
if (ORTE_SUCCESS != (ret = orte_ras_base_node_segment_empty(&empty))) {

Просмотреть файл

@ -26,7 +26,7 @@
#include "ras_lsf_bproc.h"
static int orte_ras_lsf_bproc_allocate(orte_jobid_t jobid)
static int orte_ras_lsf_bproc_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
return ORTE_SUCCESS;
}

Просмотреть файл

@ -16,16 +16,20 @@
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include "opal/util/argv.h"
#include "orte/orte_constants.h"
#include "orte/dss/dss.h"
#include "orte/mca/ras/poe/ras_poe.h"
#include "orte/mca/ras/base/ras_private.h"
static int orte_ras_poe_allocate(orte_jobid_t jobid)
static int orte_ras_poe_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
char *poe_node_str;
char **names;
@ -33,12 +37,31 @@ static int orte_ras_poe_allocate(orte_jobid_t jobid)
opal_list_t nodes_list;
orte_ras_node_t *node;
opal_list_item_t* item;
orte_jobid_t *jptr;
orte_attribute_t *attr;
poe_node_str = getenv("LOADL_PROCESSOR_LIST");
if (NULL == poe_node_str) {
return ORTE_ERR_NOT_FOUND;
}
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (ret = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(ret);
return ret;
}
return ORTE_SUCCESS;
}
/* poe_node_str is something like "nodeA nodeA nodeB nodeB" */
names = opal_argv_copy(opal_argv_split(poe_node_str, ' '));

Просмотреть файл

@ -41,7 +41,7 @@
* functions
*/
int orte_ras_proxy_allocate(orte_jobid_t job)
int orte_ras_proxy_allocate(orte_jobid_t job, opal_list_t *attributes)
{
orte_buffer_t* cmd;
orte_buffer_t* answer;
@ -69,6 +69,12 @@ int orte_ras_proxy_allocate(orte_jobid_t job)
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, attributes, 1, ORTE_ATTR_LIST))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
if (0 > orte_rml.send_buffer(orte_ras_proxy_replica, cmd, ORTE_RML_TAG_RAS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);

Просмотреть файл

@ -47,7 +47,7 @@ int orte_ras_proxy_finalize(void);
/*
* proxy function prototypes
*/
int orte_ras_proxy_allocate(orte_jobid_t job);
int orte_ras_proxy_allocate(orte_jobid_t job, opal_list_t *attributes);
int orte_ras_proxy_deallocate(orte_jobid_t job);

Просмотреть файл

@ -163,7 +163,10 @@
#include "orte/orte_constants.h"
#include "opal/mca/mca.h"
#include "opal/class/opal_list.h"
#include "orte/mca/ns/ns_types.h"
#include "ras_types.h"
#if defined(c_plusplus) || defined(__cplusplus)
@ -177,7 +180,7 @@ extern "C" {
/**
* Allocate resources to a job.
*/
typedef int (*orte_ras_base_module_allocate_fn_t)(orte_jobid_t jobid);
typedef int (*orte_ras_base_module_allocate_fn_t)(orte_jobid_t jobid, opal_list_t *attributes);
/**
* Deallocate resources from a job

Просмотреть файл

@ -16,6 +16,8 @@
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include <unistd.h>
#include <string.h>
@ -24,8 +26,9 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "orte/dss/dss.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_slurm.h"
@ -33,7 +36,7 @@
/*
* Local functions
*/
static int allocate(orte_jobid_t jobid);
static int allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int deallocate(orte_jobid_t jobid);
static int finalize(void);
@ -61,13 +64,32 @@ orte_ras_base_module_t orte_ras_slurm_module = {
* requested number of nodes/process slots to the job.
*
*/
static int allocate(orte_jobid_t jobid)
static int allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
int ret;
char *slurm_node_str;
opal_list_t nodes;
opal_list_item_t* item;
orte_jobid_t *jptr;
orte_attribute_t *attr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (ret = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(ret);
return ret;
}
return ORTE_SUCCESS;
}
slurm_node_str = getenv("SLURM_NODELIST");
if (NULL == slurm_node_str) {
opal_show_help("help-ras-slurm.txt", "env-var-not-found", 1,

Просмотреть файл

@ -17,6 +17,8 @@
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include <errno.h>
#include <unistd.h>
@ -26,8 +28,9 @@
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "orte/dss/dss.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_tm.h"
@ -35,7 +38,7 @@
/*
* Local functions
*/
static int allocate(orte_jobid_t jobid);
static int allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int deallocate(orte_jobid_t jobid);
static int finalize(void);
@ -63,13 +66,32 @@ orte_ras_base_module_t orte_ras_tm_module = {
*
*/
#include "orte/mca/gpr/gpr.h"
static int allocate(orte_jobid_t jobid)
static int allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
int ret;
opal_list_t nodes;
opal_list_item_t* item;
struct tm_roots root;
orte_jobid_t *jptr;
orte_attribute_t *attr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (ret = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(ret);
return ret;
}
return ORTE_SUCCESS;
}
/* Open up our connection to tm */
ret = tm_init(NULL, &root);

Просмотреть файл

@ -16,26 +16,30 @@
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/mca/ras/base/ras_private.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_xgrid.h"
/*
* Local functions
*/
static int allocate(orte_jobid_t jobid);
static int allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int deallocate(orte_jobid_t jobid);
static int finalize(void);
@ -61,12 +65,31 @@ orte_ras_base_module_t orte_ras_xgrid_module = {
* requested number of nodes/process slots to the job.
*
*/
static int allocate(orte_jobid_t jobid)
static int allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
int ret;
opal_list_t nodes;
opal_list_item_t* item;
orte_jobid_t *jptr;
orte_attribute_t *attr;
/* check the attributes to see if we are supposed to use the parent
* jobid's allocation. This can occur if we are doing a dynamic
* process spawn and don't want to go through the allocator again
*/
if (NULL != (attr = orte_rmgr.find_attribute(attributes, ORTE_RMGR_USE_PARENT_ALLOCATION))) {
/* attribute was given - just reallocate to the new jobid */
if (ORTE_SUCCESS != (ret = orte_dss.get((void**)&jptr, attr->value, ORTE_JOBID))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_ras_base_reallocate(*jptr, jobid))) {
ORTE_ERROR_LOG(ret);
return ret;
}
return ORTE_SUCCESS;
}
OBJ_CONSTRUCT(&nodes, opal_list_t);
if (ORTE_SUCCESS != (ret = discover(jobid, &nodes))) {
opal_output(orte_ras_base.ras_output,

Просмотреть файл

@ -23,6 +23,7 @@ headers += \
base/base.h
libmca_rmgr_la_SOURCES += \
base/rmgr_base_attribute_fns.c \
base/rmgr_base_check_context.c \
base/rmgr_base_context.c \
base/rmgr_base_con_discon.c \

Просмотреть файл

@ -21,8 +21,10 @@
#include <netinet/in.h>
#endif
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss_internal.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
@ -48,3 +50,25 @@ int orte_rmgr_base_compare_app_context_map(orte_app_context_map_t *value1, orte_
return ORTE_EQUAL;
}
/*
* ATTRIBUTE
*/
int orte_rmgr_base_compare_attribute(orte_attribute_t *value1, orte_attribute_t *value2, orte_data_type_t type)
{
return orte_dss.compare(value1, value2, ORTE_GPR_KEYVAL);
}
/*
* ATTRIBUTE LIST
*/
int orte_rmgr_base_compare_attr_list(opal_list_t *value1, opal_list_t *value2, orte_data_type_t type)
{
if (opal_list_get_size(value1) > opal_list_get_size(value2)) return ORTE_VALUE1_GREATER;
if (opal_list_get_size(value2) > opal_list_get_size(value1)) return ORTE_VALUE2_GREATER;
return ORTE_EQUAL;
}

Просмотреть файл

@ -24,6 +24,7 @@
#endif
#include "opal/util/argv.h"
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss_internal.h"
@ -94,3 +95,49 @@ int orte_rmgr_base_copy_app_context_map(orte_app_context_map_t **dest, orte_app_
return ORTE_SUCCESS;
}
/*
* ATTRIBUTE
*/
int orte_rmgr_base_copy_attribute(orte_attribute_t **dest, orte_attribute_t *src, orte_data_type_t type)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)dest, src, ORTE_GPR_KEYVAL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/*
* ATTRIBUTE LIST
*/
int orte_rmgr_base_copy_attr_list(opal_list_t **dest, opal_list_t *src, orte_data_type_t type)
{
int rc;
opal_list_item_t *item;
orte_attribute_t *attr;
/* create the new object */
*dest = OBJ_NEW(opal_list_t);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy data into it */
for (item = opal_list_get_first(src);
item != opal_list_get_end(src);
item = opal_list_get_next(item)) {
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&attr, item, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(*dest, &attr->super);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -24,6 +24,7 @@
#endif
#include "opal/util/argv.h"
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss_internal.h"
@ -184,3 +185,54 @@ int orte_rmgr_base_pack_app_context_map(orte_buffer_t *buffer, void *src,
return ORTE_SUCCESS;
}
/*
* ATTRIBUTE
*/
int orte_rmgr_base_pack_attribute(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &src, num_vals, ORTE_GPR_KEYVAL))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/*
* ATTRIBUTE LIST
*/
int orte_rmgr_base_pack_attr_list(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type)
{
int rc;
opal_list_t *attrs = (opal_list_t*)src;
opal_list_item_t *item;
orte_std_cntr_t num_attr;
/* get the number of attributes and pack it */
num_attr = opal_list_get_size(attrs);
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)&num_attr, 1, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if there were attributes, then pack those too */
if (0 < num_attr) {
for (item = opal_list_get_first(attrs);
item != opal_list_get_end(attrs);
item = opal_list_get_next(item)) {
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)item, 1, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -102,3 +102,79 @@ int orte_rmgr_base_print_app_context_map(char **output, char *prefix, orte_app_c
return ORTE_SUCCESS;
}
/*
* ATTRIBUTE
*/
int orte_rmgr_base_print_attribute(char **output, char *prefix, orte_attribute_t *src, orte_data_type_t type)
{
char *tmp, *pfx2, *pfx3;
int rc;
/* set default result */
*output = NULL;
/* protect against NULL prefix */
if (NULL == prefix) {
asprintf(&pfx2, " ");
} else {
asprintf(&pfx2, "%s", prefix);
}
asprintf(&pfx3, "%s\t", pfx2);
if (ORTE_SUCCESS != (rc = orte_dss.print(&tmp, pfx3, src, ORTE_GPR_KEYVAL))) {
ORTE_ERROR_LOG(rc);
free(pfx2);
free(pfx3);
return rc;
}
asprintf(output, "%sAttribute:\n%s", pfx2, tmp);
free(pfx2);
free(pfx3);
return ORTE_SUCCESS;
}
int orte_rmgr_base_print_attr_list(char **output, char *prefix, opal_list_t *src, orte_data_type_t type)
{
opal_list_item_t *item;
char *tmp, *tmp2, *tmp3, *pfx2, *pfx3;
int rc;
/* set default result */
*output = NULL;
/* protect against NULL prefix */
if (NULL == prefix) {
asprintf(&pfx2, " ");
} else {
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sList of %ld Attributes:\n", pfx2, (long)opal_list_get_size(src));
asprintf(&pfx3, "%s\t", pfx2);
for (item = opal_list_get_first(src);
item != opal_list_get_end(src);
item = opal_list_get_next(item)) {
if (ORTE_SUCCESS != (rc = orte_rmgr_base_print_attribute(&tmp2, pfx3, (orte_attribute_t*)item, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
free(pfx2);
free(pfx3);
return rc;
}
asprintf(&tmp3, "%s%s\n", tmp, tmp2);
free(tmp);
free(tmp2);
tmp = tmp3;
}
*output = tmp;
free(pfx2);
free(pfx3);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -103,3 +103,44 @@ int orte_rmgr_base_size_app_context_map(size_t *size, orte_app_context_map_t *sr
return ORTE_SUCCESS;
}
/*
* ATTRIBUTE
*/
int orte_rmgr_base_size_attribute(size_t *size, orte_attribute_t *src, orte_data_type_t type)
{
int rc;
size_t tsize;
*size = 0;
if (ORTE_SUCCESS != (orte_dss.size(&tsize, src, ORTE_GPR_KEYVAL))) {
ORTE_ERROR_LOG(rc);
}
*size = tsize;
return rc;
}
int orte_rmgr_base_size_attr_list(size_t *size, opal_list_t *src, orte_data_type_t type)
{
int rc;
opal_list_item_t *item;
size_t tsize;
*size = 0;
for (item = opal_list_get_first(src);
item != opal_list_get_end(src);
item = opal_list_get_next(item)) {
if (ORTE_SUCCESS != (rc = orte_rmgr_base_size_attribute(&tsize, (orte_attribute_t*)item, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
*size += tsize;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -25,6 +25,8 @@
#include <netinet/in.h>
#endif
#include "opal/class/opal_list.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/dss/dss_internal.h"
@ -225,3 +227,54 @@ int orte_rmgr_base_unpack_app_context_map(orte_buffer_t *buffer, void *dest,
return ORTE_SUCCESS;
}
/*
* ATTRIBUTE
*/
int orte_rmgr_base_unpack_attribute(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_KEYVAL))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/*
* ATTRIBUTE LIST
*/
int orte_rmgr_base_unpack_attr_list(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type)
{
int rc;
orte_std_cntr_t count, num_attr, i;
orte_attribute_t *attr;
count = 1;
if(ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &num_attr, &count, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if there are any...unpack them */
for (i=0; i < num_attr; i++) {
attr = OBJ_NEW(orte_attribute_t);
if (NULL == attr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
count = 1;
if(ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &attr, &count, ORTE_ATTRIBUTE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(dest, &attr->super);
}
return ORTE_SUCCESS;
}

137
orte/mca/rmgr/base/rmgr_base_attribute_fns.c Обычный файл
Просмотреть файл

@ -0,0 +1,137 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* Attribute functions for the RMGR subsystem
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include <string.h>
#include "opal/util/output.h"
#include "opal/class/opal_list.h"
#include "orte/dss/dss.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmgr/base/rmgr_private.h"
/*
* FIND ATTRIBUTE
*/
orte_attribute_t* orte_rmgr_base_find_attribute(opal_list_t* attr_list, char* key)
{
opal_list_item_t *item;
orte_attribute_t *kval;
for (item = opal_list_get_first(attr_list);
item != opal_list_get_end(attr_list);
item = opal_list_get_next(item)) {
kval = (orte_attribute_t*)item;
if (strcmp(key, kval->key) == 0) {
/** found it */
return kval; /** return a pointer to this attribute */
}
}
/** didn't find it */
return NULL;
}
/*
* ADD ATTRIBUTE
*/
int orte_rmgr_base_add_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data)
{
int rc;
orte_attribute_t *kval;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&kval, key, type, data))) {
ORTE_ERROR_LOG(rc);
return rc;
}
opal_list_append(attr_list, &kval->super);
return ORTE_SUCCESS;
}
/*
* UPDATE ATTRIBUTE
*/
int orte_rmgr_base_update_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data)
{
opal_list_item_t *item;
orte_attribute_t *kval;
int rc;
for (item = opal_list_get_first(attr_list);
item != opal_list_get_end(attr_list);
item = opal_list_get_next(item)) {
kval = (orte_attribute_t*)item;
if (strcmp(key, kval->key) == 0) {
/** found it - replace the value by releasing
* this item and replacing it with a new one
*/
opal_list_remove_item(attr_list, item);
OBJ_RELEASE(item);
goto ADD_ITEM;
}
}
ADD_ITEM:
/** didn't find it or replacing the old one - add the attribute */
if (ORTE_SUCCESS != (rc = orte_rmgr_base_add_attribute(attr_list, key, type, data))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/*
* DELETE ATTRIBUTE
*/
int orte_rmgr_base_delete_attribute(opal_list_t* attr_list, char* key)
{
opal_list_item_t *item;
orte_attribute_t *kval;
for (item = opal_list_get_first(attr_list);
item != opal_list_get_end(attr_list);
item = opal_list_get_next(item)) {
kval = (orte_attribute_t*)item;
if (strcmp(key, kval->key) == 0) {
/** found it - remove it from list */
opal_list_remove_item(attr_list, item);
OBJ_RELEASE(item);
return ORTE_SUCCESS;
}
}
/** didn't find it - don't error log this as it might not be an error */
return ORTE_ERR_NOT_FOUND;
}

Просмотреть файл

@ -57,6 +57,10 @@ orte_rmgr_base_module_t orte_rmgr = {
orte_rmgr_base_disconnect,
orte_rmgr_base_finalize_not_available,
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_find_attribute,
orte_rmgr_base_add_attribute,
orte_rmgr_base_update_attribute,
orte_rmgr_base_delete_attribute,
orte_rmgr_base_get_app_context,
orte_rmgr_base_put_app_context,
orte_rmgr_base_check_context_cwd,
@ -152,6 +156,27 @@ OBJ_CLASS_INSTANCE(orte_app_context_map_t,
orte_app_context_map_construct,
orte_app_context_map_destruct);
/*
* ATTRIBUTE
*/
static void orte_attribute_construct(orte_attribute_t *a)
{
a->key = NULL;
a->value = NULL;
}
static void orte_attribute_destruct(orte_attribute_t *a)
{
if (NULL != a->key) free(a->key);
if (NULL != a->value) OBJ_RELEASE(a->value);
}
OBJ_CLASS_INSTANCE(orte_attribute_t,
opal_list_item_t,
orte_attribute_construct,
orte_attribute_destruct);
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
@ -205,7 +230,35 @@ int orte_rmgr_base_open(void)
return rc;
}
/* Open up all available components */
tmp = ORTE_ATTRIBUTE;
if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_rmgr_base_pack_attribute,
orte_rmgr_base_unpack_attribute,
(orte_dss_copy_fn_t)orte_rmgr_base_copy_attribute,
(orte_dss_compare_fn_t)orte_rmgr_base_compare_attribute,
(orte_dss_size_fn_t)orte_rmgr_base_size_attribute,
(orte_dss_print_fn_t)orte_rmgr_base_print_attribute,
(orte_dss_release_fn_t)orte_rmgr_base_std_obj_release,
ORTE_DSS_STRUCTURED,
"ORTE_ATTRIBUTE", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
tmp = ORTE_ATTR_LIST;
if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_rmgr_base_pack_attr_list,
orte_rmgr_base_unpack_attr_list,
(orte_dss_copy_fn_t)orte_rmgr_base_copy_attr_list,
(orte_dss_compare_fn_t)orte_rmgr_base_compare_attr_list,
(orte_dss_size_fn_t)orte_rmgr_base_size_attr_list,
(orte_dss_print_fn_t)orte_rmgr_base_print_attr_list,
(orte_dss_release_fn_t)orte_rmgr_base_std_obj_release,
ORTE_DSS_STRUCTURED,
"ORTE_ATTR_LIST", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* Open up all available components */
if (ORTE_SUCCESS !=
mca_base_components_open("rmgr", orte_rmgr_base.rmgr_output,

Просмотреть файл

@ -93,9 +93,11 @@ void orte_rmgr_base_recv(int status, orte_process_name_t* sender,
{
orte_buffer_t answer;
orte_rmgr_cmd_t command;
orte_std_cntr_t count, num_context;
orte_std_cntr_t i, count, num_context;
orte_jobid_t job;
orte_app_context_t **context;
opal_list_item_t *item;
opal_list_t attrs;
int rc;
OPAL_TRACE(2);
@ -179,21 +181,39 @@ void orte_rmgr_base_recv(int status, orte_process_name_t* sender,
free(context);
return;
}
/* unpack the attributes */
OBJ_CONSTRUCT(&attrs, opal_list_t);
count = 1;
if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &attrs, &count, ORTE_ATTR_LIST))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP_SPAWN;
}
/* process the request */
/* init the job to be INVALID so we setup the job */
job = ORTE_JOBID_INVALID;
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(context, num_context, &job,
0, NULL, NULL, ORTE_PROC_STATE_NONE))) {
0, NULL, NULL, ORTE_PROC_STATE_NONE, &attrs))) {
ORTE_ERROR_LOG(rc);
goto SEND_ANSWER;
goto CLEANUP_SPAWN;
}
/* return the new jobid */
if(ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &job, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
goto SEND_ANSWER;
}
CLEANUP_SPAWN:
for (i=0; i < num_context; i++) {
OBJ_RELEASE(context[i]);
}
if (NULL != context) free(context);
while (NULL != (item = opal_list_remove_first(&attrs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&attrs);
break;
case ORTE_RMGR_SETUP_GATES_CMD:

Просмотреть файл

@ -46,7 +46,8 @@ orte_rmgr_base_spawn_not_available(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions)
orte_proc_state_t cb_conditions,
opal_list_t *attributes)
{
return ORTE_ERR_UNREACH;
}

Просмотреть файл

@ -44,6 +44,16 @@
extern "C" {
#endif
/*
* Constants for command values
*/
#define ORTE_RMGR_SETUP_JOB_CMD 1
#define ORTE_RMGR_SPAWN_JOB_CMD 2
#define ORTE_RMGR_SETUP_GATES_CMD 3
#define ORTE_RMGR_CMD ORTE_UINT8
typedef uint8_t orte_rmgr_cmd_t;
/*
* Internal definitions
*/
@ -91,7 +101,8 @@ int orte_rmgr_base_spawn_not_available(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
orte_proc_state_t cb_conditions,
opal_list_t *attributes);
ORTE_DECLSPEC int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);
@ -99,6 +110,16 @@ ORTE_DECLSPEC int orte_rmgr_base_connect(orte_std_cntr_t num_connect,
ORTE_DECLSPEC int orte_rmgr_base_disconnect(orte_std_cntr_t num_disconnect,
orte_process_name_t *disconnect);
orte_gpr_keyval_t* orte_rmgr_base_find_attribute(opal_list_t* attr_list, char* key);
int orte_rmgr_base_add_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
int orte_rmgr_base_update_attribute(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
int orte_rmgr_base_delete_attribute(opal_list_t* attr_list, char* key);
int orte_rmgr_base_finalize_not_available(void);
/*
@ -125,6 +146,13 @@ int orte_rmgr_base_pack_app_context(orte_buffer_t *buffer, void *src,
int orte_rmgr_base_pack_app_context_map(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmgr_base_pack_attr_list(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
int orte_rmgr_base_pack_attribute(orte_buffer_t *buffer, void *src,
orte_std_cntr_t num_vals, orte_data_type_t type);
/*
* DATA TYPE UNPACKING FUNCTIONS
*/
@ -134,6 +162,12 @@ int orte_rmgr_base_unpack_app_context(orte_buffer_t *buffer, void *dest,
int orte_rmgr_base_unpack_app_context_map(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
int orte_rmgr_base_unpack_attr_list(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
int orte_rmgr_base_unpack_attribute(orte_buffer_t *buffer, void *dest,
orte_std_cntr_t *num_vals, orte_data_type_t type);
/*
* COMPARE FUNCTIONS
@ -142,6 +176,10 @@ int orte_rmgr_base_compare_app_context(orte_app_context_t *value1, orte_app_cont
int orte_rmgr_base_compare_app_context_map(orte_app_context_map_t *value1, orte_app_context_map_t *value2, orte_data_type_t type);
int orte_rmgr_base_compare_attr_list(opal_list_t *value1, opal_list_t *value2, orte_data_type_t type);
int orte_rmgr_base_compare_attribute(orte_attribute_t *value1, orte_attribute_t *value2, orte_data_type_t type);
/*
* COPY FUNCTIONS
*/
@ -149,6 +187,10 @@ int orte_rmgr_base_copy_app_context(orte_app_context_t **dest, orte_app_context_
int orte_rmgr_base_copy_app_context_map(orte_app_context_map_t **dest, orte_app_context_map_t *src, orte_data_type_t type);
int orte_rmgr_base_copy_attr_list(opal_list_t **dest, opal_list_t *src, orte_data_type_t type);
int orte_rmgr_base_copy_attribute(orte_attribute_t **dest, orte_attribute_t *src, orte_data_type_t type);
/*
* PRINT FUNCTIONS
*/
@ -156,6 +198,11 @@ int orte_rmgr_base_print_app_context(char **output, char *prefix, orte_app_conte
int orte_rmgr_base_print_app_context_map(char **output, char *prefix, orte_app_context_map_t *src, orte_data_type_t type);
int orte_rmgr_base_print_attribute(char **output, char *prefix, orte_attribute_t *src, orte_data_type_t type);
int orte_rmgr_base_print_attr_list(char **output, char *prefix, opal_list_t *src, orte_data_type_t type);
/*
* SIZE FUNCTIONS
*/
@ -163,6 +210,10 @@ int orte_rmgr_base_size_app_context(size_t *size, orte_app_context_t *src, orte_
int orte_rmgr_base_size_app_context_map(size_t *size, orte_app_context_map_t *src, orte_data_type_t type);
int orte_rmgr_base_size_attr_list(size_t *size, opal_list_t *src, orte_data_type_t type);
int orte_rmgr_base_size_attribute(size_t *size, orte_attribute_t *src, orte_data_type_t type);
/*
* RELEASE FUNCTIONS
*/

Просмотреть файл

@ -30,6 +30,8 @@
#include <catamount/cnos_mpi_os.h>
#endif
#include "opal/class/opal_list.h"
#include "orte/orte_constants.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/ns/ns.h"
@ -49,7 +51,8 @@ static int orte_rmgr_cnos_spawn_job(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
orte_proc_state_t cb_conditions,
opal_list_t *attributes);
static int orte_rmgr_cnos_connect(orte_std_cntr_t num_connect,
orte_process_name_t *connect);

Просмотреть файл

@ -23,6 +23,8 @@
#endif /* HAVE_STRING_H */
#include "orte/orte_constants.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "opal/util/trace.h"
@ -53,7 +55,8 @@ static int orte_rmgr_proxy_spawn_job(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
orte_proc_state_t cb_conditions,
opal_list_t *attributes);
orte_rmgr_base_module_t orte_rmgr_proxy_module = {
NULL, /* don't need special init */
@ -63,6 +66,10 @@ orte_rmgr_base_module_t orte_rmgr_proxy_module = {
orte_rmgr_base_disconnect,
NULL, /* finalize */
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_find_attribute,
orte_rmgr_base_add_attribute,
orte_rmgr_base_update_attribute,
orte_rmgr_base_delete_attribute,
orte_rmgr_base_get_app_context,
orte_rmgr_base_put_app_context,
orte_rmgr_base_check_context_cwd,
@ -348,7 +355,8 @@ static int orte_rmgr_proxy_spawn_job(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfunc,
orte_proc_state_t cb_conditions)
orte_proc_state_t cb_conditions,
opal_list_t *attributes)
{
int rc;
orte_process_name_t name = {0, ORTE_JOBID_INVALID, 0};
@ -371,7 +379,7 @@ static int orte_rmgr_proxy_spawn_job(
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ras.allocate_job(*jobid))) {
if (ORTE_SUCCESS != (rc = orte_ras.allocate_job(*jobid, attributes))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -96,7 +96,8 @@ typedef int (*orte_rmgr_base_module_spawn_job_fn_t)(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
orte_proc_state_t cb_conditions,
opal_list_t *attributes);
/**
@ -132,6 +133,80 @@ typedef int (*orte_rmgr_base_module_init_fn_t)(void);
typedef int (*orte_rmgr_base_module_finalize_fn_t)(void);
/**
* GENERAL UTILITY FUNCTIONS
*/
/**
* Find an attribute
* Given a pointer array of attributes, return a pointer to the specified attribute
*
* @param attr_list A pointer to the list of attributes
* @param key The key indicating the attribute to be returned.
* @retval ptr A pointer to the orte_gpr_keyval_t containing the attribute.
* Note that this value is *not* being duplicated, nor is it being
* removed from the list, so alterations or release of the object will impact the list!
* @retval NULL The specified attribute was not found - this function does not
* consider this to be an error, so no ORTE_ERROR_LOG is printed out when this happens.
*/
typedef orte_attribute_t* (*orte_rmgr_base_module_find_attribute_fn_t)(opal_list_t* attr_list, char* key);
/**
* Add an attribute
* Given a pointer array of attributes and the data for a new attribute,
* this function will create the gpr_keyval_t object for that attribute,
* populate it with the provided data, and append it to the list.
*
* @param attr_list A pointer to the list of attributes
* @param key The key for the attribute.
* @param type The data type to be stored in the attribute. A value
* of ORTE_UNDEF is acceptable to indicate that no data is being stored -
* the existence of the attribute on the list is all that is required.
* @param data A pointer to the data to be stored in the attribute. NULL
* is acceptable IF the data type is ORTE_UNDEF.
* @retval ORTE_SUCCESS Attribute was added to list.
* @retval ORTE_ERROR An appropriate error code indicating what went wrong.
*/
typedef int (*orte_rmgr_base_module_add_attribute_fn_t)(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
/**
* Update an attribute
* Given a pointer array of attributes and the data for a new attribute,
* this function will find the attribute matching the given key and
* replace the current value with the one given. If the attribute is NOT
* found on the list, it will be added to it.
*
* @param attr_list A pointer to the list of attributes
* @param key The key for the attribute.
* @param type The data type to be stored in the attribute. A value
* of ORTE_UNDEF is acceptable to indicate that no data is being stored -
* the existence of the attribute on the list is all that is required.
* @param data A pointer to the data to be stored in the attribute. NULL
* is acceptable IF the data type is ORTE_UNDEF.
* @retval ORTE_SUCCESS Attribute was added to list.
* @retval ORTE_ERROR An appropriate error code indicating what went wrong.
*/
typedef int (*orte_rmgr_base_module_update_attribute_fn_t)(opal_list_t* attr_list, char* key,
orte_data_type_t type, void *data);
/**
* Delete an attribute
* Given a pointer array of attributes, delete the specified attribute
*
* @param attr_list A pointer to the list of attributes
* @param key The key indicating the attribute to be deleted.
* @retval ORTE_SUCCESS Attribute was added to list.
* @retval ORTE_ERROR An appropriate error code indicating what went wrong. Note that
* an error code of NOT_FOUND will be returned if the specified attribute is
* not on the provided list - it is up to the caller to decide if this is an actual
* error. This function will NOT do an ORTE_ERROR_LOG in the case of NOT_FOUND.
*/
typedef int (*orte_rmgr_base_module_delete_attribute_fn_t)(opal_list_t* attr_list, char* key);
/*** APP_CONTEXT FUNCTIONS ***/
/*
* Store an array of app_context objects for a given job/pset
@ -189,6 +264,10 @@ struct orte_rmgr_base_module_2_0_0_t {
orte_rmgr_base_module_disconnect_fn_t disconnect;
orte_rmgr_base_module_finalize_fn_t finalize;
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_module_find_attribute_fn_t find_attribute;
orte_rmgr_base_module_add_attribute_fn_t add_attribute;
orte_rmgr_base_module_update_attribute_fn_t update_attribute;
orte_rmgr_base_module_delete_attribute_fn_t delete_attribute;
orte_rmgr_base_module_get_app_context_fn_t get_app_context;
orte_rmgr_base_module_store_app_context_fn_t store_app_context;
orte_rmgr_base_module_check_context_cwd_fn_t check_context_cwd;

Просмотреть файл

@ -23,6 +23,8 @@
#include "opal/class/opal_object.h"
#include "orte/mca/gpr/gpr_types.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
@ -33,14 +35,13 @@ extern "C" {
#define ORTE_RMGR_LAUNCHER "orte-rmgr-launcher"
/*
* Constants for command values
* RMGR ATTRIBUTES
*/
#define ORTE_RMGR_SETUP_JOB_CMD 1
#define ORTE_RMGR_SPAWN_JOB_CMD 2
#define ORTE_RMGR_SETUP_GATES_CMD 3
typedef orte_gpr_keyval_t orte_attribute_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_attribute_t);
#define ORTE_RMGR_USE_PARENT_ALLOCATION "orte-use-parent-alloc"
#define ORTE_RMGR_CMD ORTE_UINT8
typedef uint8_t orte_rmgr_cmd_t;
/* RESOURCE MANAGER DATA TYPES */

Просмотреть файл

@ -29,6 +29,7 @@
#include <string.h>
#endif /* HAVE_STRING_H */
#include "opal/class/opal_list.h"
#include "opal/util/trace.h"
#include "opal/util/output.h"
@ -58,7 +59,8 @@ static int orte_rmgr_urm_spawn_job(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfn,
orte_proc_state_t cb_conditions);
orte_proc_state_t cb_conditions,
opal_list_t *attributes);
static int orte_rmgr_urm_module_init(void);
@ -73,6 +75,10 @@ orte_rmgr_base_module_t orte_rmgr_urm_module = {
orte_rmgr_base_disconnect,
orte_rmgr_urm_module_finalize,
/** SUPPORT FUNCTIONS ***/
orte_rmgr_base_find_attribute,
orte_rmgr_base_add_attribute,
orte_rmgr_base_update_attribute,
orte_rmgr_base_delete_attribute,
orte_rmgr_base_get_app_context,
orte_rmgr_base_put_app_context,
orte_rmgr_base_check_context_cwd,
@ -272,7 +278,8 @@ static int orte_rmgr_urm_spawn_job(
orte_std_cntr_t num_connect,
orte_process_name_t *connect,
orte_rmgr_cb_fn_t cbfunc,
orte_proc_state_t cb_conditions)
orte_proc_state_t cb_conditions,
opal_list_t *attributes)
{
int rc;
orte_process_name_t* name;
@ -313,7 +320,7 @@ static int orte_rmgr_urm_spawn_job(
}
}
if (ORTE_SUCCESS != (rc = orte_ras.allocate_job(*jobid))) {
if (ORTE_SUCCESS != (rc = orte_ras.allocate_job(*jobid, attributes))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -509,6 +509,7 @@ int orte_init_stage1(bool infrastructure)
orte_rds_cell_desc_t *rds_item;
orte_rds_cell_attr_t *new_attr;
orte_ras_node_t *ras_item;
opal_list_t attrs;
OBJ_CONSTRUCT(&single_host, opal_list_t);
OBJ_CONSTRUCT(&rds_single_host, opal_list_t);
@ -606,7 +607,9 @@ int orte_init_stage1(bool infrastructure)
you'll end up with the localhost *and* all the other
nodes in your allocation on the node segment -- which
is probably fine) */
orte_ras.allocate_job(my_jobid);
OBJ_CONSTRUCT(&attrs, opal_list_t);
orte_ras.allocate_job(my_jobid, &attrs);
OBJ_DESTRUCT(&attrs);
OBJ_DESTRUCT(&single_host);
OBJ_DESTRUCT(&rds_single_host);

Просмотреть файл

@ -295,6 +295,7 @@ int orterun(int argc, char *argv[])
int id, iparam;
orte_proc_state_t cb_states;
orte_job_state_t exit_state;
opal_list_t attributes;
/* Setup MCA params */
@ -418,6 +419,8 @@ int orterun(int argc, char *argv[])
/* Prep to start the application */
/* construct the list of attributes */
OBJ_CONSTRUCT(&attributes, opal_list_t);
/** setup callbacks for abort signals */
opal_signal_set(&term_handler, SIGTERM,
@ -441,7 +444,7 @@ int orterun(int argc, char *argv[])
/* Spawn the job */
cb_states = ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_AT_STG1;
rc = orte_rmgr.spawn_job(apps, num_apps, &jobid, 0, NULL, job_state_callback, cb_states);
rc = orte_rmgr.spawn_job(apps, num_apps, &jobid, 0, NULL, job_state_callback, cb_states, &attributes);
if (ORTE_SUCCESS != rc) {
/* JMS show_help */
opal_output(0, "%s: spawn failed with errno=%d\n", orterun_basename, rc);
@ -500,11 +503,14 @@ int orterun(int argc, char *argv[])
}
/* All done */
OBJ_DESTRUCT(&attributes);
for (i = 0; i < num_apps; ++i) {
OBJ_RELEASE(apps[i]);
}
free(apps);
OBJ_RELEASE(apps_pa);
orte_finalize();
free(orterun_basename);
return rc;