1
1

Bring in a forgotten speed improvement for the TM launcher that was developed during SNL Tbird testing last year. Remove the redundant and slow calls to TM to resolve hostnames. Instead, read the host info from the PBS file during the RAS, and then just use that info in the PLS (rather than getting it again).

Adjust the RMAPS mapped_node object to propagate the required launch_id info now included in the ras_node object. This provides support for those few systems that don't use nodename to launch, but instead want some id (typically an index into the array of allocated nodes). This value gets set for each node in the RAS - the RMAPS just propagates it for easy launch.

This commit was SVN r13581.
Этот коммит содержится в:
Ralph Castain 2007-02-09 15:06:45 +00:00
родитель 64bf42fc0d
Коммит 5818a32245
20 изменённых файлов: 241 добавлений и 310 удалений

Просмотреть файл

@ -89,20 +89,11 @@ static int pls_tm_finalize(void);
static int pls_tm_connect(void);
static int pls_tm_disconnect(void);
static int pls_tm_query_hostnames(void);
static int pls_tm_start_proc(char *nodename, int argc, char **argv,
char **env, tm_task_id *task_id,
tm_event_t *event);
static int pls_tm_check_path(char *exe, char **env);
/*
* Local variables
*/
/* Resolving TM hostname */
static char **tm_hostnames = NULL;
static tm_node_id *tm_node_ids = NULL;
static int num_tm_hostnames = 0, num_node_ids = 0;
/*
@ -298,15 +289,6 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
}
connected = true;
/* Resolve the TM hostnames and TD node ID's (guarantee that we
don't mix any of these TM events in with the TM spawn events,
so that we can poll for each set of events without interference
from the other */
rc = pls_tm_query_hostnames();
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* Figure out the basenames for the libdir and bindir. There is a
lengthy comment about this in pls_rsh_module.c explaining all
the rationale for how / why we're doing this. */
@ -440,9 +422,11 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
}
}
rc = pls_tm_start_proc(node->nodename, argc, argv, env,
tm_task_ids + launched,
tm_events + launched);
rc = tm_spawn(argc, argv, env, node->launch_id, tm_task_ids + launched, tm_events + launched);
if (TM_SUCCESS != rc) {
return ORTE_ERROR;
}
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:tm: start_procs returned error %d", rc);
goto cleanup;
@ -689,17 +673,6 @@ static int pls_tm_finalize(void)
if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) {
ORTE_ERROR_LOG(rc);
}
if (NULL != tm_hostnames) {
opal_argv_free(tm_hostnames);
tm_hostnames = NULL;
num_tm_hostnames = 0;
}
if (NULL != tm_node_ids) {
free(tm_node_ids);
tm_node_ids = NULL;
num_node_ids = 0;
}
return ORTE_SUCCESS;
}
@ -739,142 +712,6 @@ static int pls_tm_disconnect(void)
}
/*
* For a given TM node ID, get the string hostname corresponding to
* it.
*/
static char *get_tm_hostname(tm_node_id node)
{
char *hostname;
char buffer[256];
int ret, local_errno;
tm_event_t event;
char **argv;
/* Get the info string corresponding to this TM node ID */
ret = tm_rescinfo(node, buffer, sizeof(buffer) - 1, &event);
if (TM_SUCCESS != ret) {
opal_output(0, "tm_rescinfo returned %d\n", ret);
return NULL;
}
/* Now wait for that event to happen */
ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_errno);
if (TM_SUCCESS != ret) {
opal_output(0, "tm_poll returned %d\n", ret);
return NULL;
}
/* According to the TM man page, we get back a space-separated
string array. The hostname is the second item. Use a cheap
trick to get it. */
buffer[sizeof(buffer) - 1] = '\0';
argv = opal_argv_split(buffer, ' ');
if (NULL == argv) {
opal_output(0, "opal_argv_split failed\n");
return NULL;
}
hostname = strdup(argv[1]);
opal_argv_free(argv);
/* All done */
return hostname;
}
static int pls_tm_query_hostnames(void)
{
char *h;
int i, ret;
/* Get the list of nodes allocated in this PBS job */
ret = tm_nodeinfo(&tm_node_ids, &num_node_ids);
if (TM_SUCCESS != ret) {
return ORTE_ERR_NOT_FOUND;
}
/* TM "nodes" may actually correspond to PBS "VCPUs", which means
there may be multiple "TM nodes" that correspond to the same
physical node. This doesn't really affect what we're doing
here (we actually ignore the fact that they're duplicates --
slightly inefficient, but no big deal); just mentioned for
completeness... */
tm_hostnames = NULL;
num_tm_hostnames = 0;
for (i = 0; i < num_node_ids; ++i) {
h = get_tm_hostname(tm_node_ids[i]);
if (NULL == h) {
opal_output(0, "get_tm_hostname returned NULL");
return ORTE_ERROR;
}
opal_argv_append(&num_tm_hostnames, &tm_hostnames, h);
free(h);
}
/* All done */
return ORTE_SUCCESS;
}
static int do_tm_resolve(char *hostname, tm_node_id *tnodeid)
{
int i, ret;
/* Have we already queried TM for all the node info? */
if (NULL == tm_hostnames) {
return ORTE_ERR_NOT_FOUND;
}
/* Find the TM ID of the hostname that we're looking for */
for (i = 0; i < num_tm_hostnames; ++i) {
if (0 == strcmp(hostname, tm_hostnames[i])) {
*tnodeid = tm_node_ids[i];
if (mca_pls_tm_component.debug) {
opal_output(0, "pls:tm:launch: resolved host %s to node ID %d",
hostname, tm_node_ids[i]);
}
break;
}
}
/* All done */
if (i < num_tm_hostnames) {
ret = ORTE_SUCCESS;
} else {
ret = ORTE_ERR_NOT_FOUND;
}
return ret;
}
static int pls_tm_start_proc(char *nodename, int argc, char **argv, char **env,
tm_task_id *task_id, tm_event_t *event)
{
int ret;
tm_node_id node_id;
/* get the tm node id for this node */
ret = do_tm_resolve(nodename, &node_id);
if (ORTE_SUCCESS != ret) {
return ret;
}
ret = tm_spawn(argc, argv, env, node_id, task_id, event);
if (TM_SUCCESS != ret) {
return ORTE_ERROR;
}
return ORTE_SUCCESS;
}
static int pls_tm_check_path(char *exe, char **env)
{
static int size = 256;

Просмотреть файл

@ -45,6 +45,7 @@ int orte_ras_base_copy_node(orte_ras_node_t **dest, orte_ras_node_t *src, orte_d
/* copy data into it */
if (NULL != src->node_name) (*dest)->node_name = strdup(src->node_name);
(*dest)->launch_id = src->launch_id;
if (NULL != src->node_arch) (*dest)->node_arch = strdup(src->node_arch);
(*dest)->node_cellid = src->node_cellid;
(*dest)->node_state = src->node_state;

Просмотреть файл

@ -51,6 +51,13 @@ int orte_ras_base_pack_node(orte_buffer_t *buffer, void *src,
return rc;
}
/* pack the launch id */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer,
(void*)(&(nodes[i]->launch_id)), 1, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the arch */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer,
(void*)(&(nodes[i]->node_arch)), 1, ORTE_STRING))) {

Просмотреть файл

@ -48,8 +48,8 @@ int orte_ras_base_print_node(char **output, char *prefix, orte_ras_node_t *src,
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sData for node: cellid: %lu\tName: %s",
pfx2, (unsigned long)src->node_cellid, src->node_name);
asprintf(&tmp, "%sData for node: cellid: %lu\tName: %s\tLaunch id: %ld",
pfx2, (unsigned long)src->node_cellid, src->node_name, (long)src->launch_id);
asprintf(&tmp2, "%s\n%s\tArch: %s\tState: %lu", tmp, pfx2,
src->node_arch, (unsigned long)src->node_state);

Просмотреть файл

@ -61,6 +61,14 @@ int orte_ras_base_unpack_node(orte_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the launch id */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,
(&(nodes[i]->launch_id)), &n, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the arch */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,

Просмотреть файл

@ -33,6 +33,7 @@
static void orte_ras_base_node_construct(orte_ras_node_t* node)
{
node->node_name = NULL;
node->launch_id = -1;
node->node_arch = NULL;
node->node_cellid = 0;
node->node_state = ORTE_NODE_STATE_UNKNOWN;
@ -71,9 +72,23 @@ OBJ_CLASS_INSTANCE(
int orte_ras_base_node_query(opal_list_t* nodes)
{
char* keys[] = {
ORTE_NODE_NAME_KEY,
ORTE_NODE_LAUNCH_ID_KEY,
ORTE_NODE_ARCH_KEY,
ORTE_NODE_STATE_KEY,
ORTE_NODE_SLOTS_KEY,
ORTE_NODE_SLOTS_IN_USE_KEY,
ORTE_NODE_SLOTS_ALLOC_KEY,
ORTE_NODE_SLOTS_MAX_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_CELLID_KEY,
NULL
};
orte_std_cntr_t i, cnt, *sptr;
orte_node_state_t *nsptr;
orte_cellid_t *cptr;
int32_t *i32;
orte_gpr_value_t** values;
int rc;
@ -82,7 +97,7 @@ int orte_ras_base_node_query(opal_list_t* nodes)
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
NULL,
NULL,
keys,
&cnt,
&values);
if(ORTE_SUCCESS != rc) {
@ -108,6 +123,14 @@ int orte_ras_base_node_query(opal_list_t* nodes)
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_LAUNCH_ID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&i32, keyval->value, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->launch_id = *i32;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_ARCH_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
@ -193,6 +216,7 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid)
{
char* keys[] = {
ORTE_NODE_NAME_KEY,
ORTE_NODE_LAUNCH_ID_KEY,
ORTE_NODE_ARCH_KEY,
ORTE_NODE_STATE_KEY,
ORTE_NODE_SLOTS_KEY,
@ -209,6 +233,7 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid)
orte_std_cntr_t *sptr;
orte_node_state_t *nsptr;
orte_cellid_t *cptr;
int32_t *i32;
int rc, alloc_key_posn=5;
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_str, jobid))) {
@ -266,6 +291,14 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid)
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_LAUNCH_ID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&i32, keyval->value, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->launch_id = *i32;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_ARCH_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
@ -363,11 +396,25 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid)
orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t cellid, const char* node_name)
{
char* keys[] = {
ORTE_NODE_NAME_KEY,
ORTE_NODE_LAUNCH_ID_KEY,
ORTE_NODE_ARCH_KEY,
ORTE_NODE_STATE_KEY,
ORTE_NODE_SLOTS_KEY,
ORTE_NODE_SLOTS_IN_USE_KEY,
ORTE_NODE_SLOTS_ALLOC_KEY,
ORTE_NODE_SLOTS_MAX_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_CELLID_KEY,
NULL
};
orte_ras_node_t* node = NULL;
orte_std_cntr_t i, cnt, num_tokens;
orte_std_cntr_t *sptr;
orte_cellid_t *cptr;
orte_node_state_t *nsptr;
int32_t *i32;
orte_gpr_value_t** values;
char** tokens = NULL;
int rc;
@ -383,7 +430,7 @@ orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t cellid, const char* nod
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
tokens,
NULL,
keys,
&cnt,
&values);
if(ORTE_SUCCESS != rc) {
@ -409,6 +456,14 @@ orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t cellid, const char* nod
}
continue;
}
if(strcmp(keyval->key, ORTE_NODE_LAUNCH_ID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&i32, keyval->value, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
continue;
}
node->launch_id = *i32;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_ARCH_KEY) == 0) {
/* we use the dss.copy function here instead of strdup because that function
* automatically protects us against a NULL (or zero-length) string
@ -500,6 +555,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes)
orte_std_cntr_t num_values, i, j;
char *keys[] = {
ORTE_NODE_NAME_KEY,
ORTE_NODE_LAUNCH_ID_KEY,
ORTE_NODE_ARCH_KEY,
ORTE_NODE_STATE_KEY,
ORTE_CELLID_KEY,
@ -510,6 +566,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes)
};
orte_data_type_t types[] = {
ORTE_STRING,
ORTE_INT32,
ORTE_STRING,
ORTE_NODE_STATE,
ORTE_CELLID,
@ -535,7 +592,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes)
for (i=0; i < num_values; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
ORTE_NODE_SEGMENT, 8, 0))) {
ORTE_NODE_SEGMENT, 9, 0))) {
ORTE_ERROR_LOG(rc);
for (j=0; j < i; j++) {
OBJ_RELEASE(values[j]);
@ -556,6 +613,12 @@ int orte_ras_base_node_insert(opal_list_t* nodes)
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], &(node->launch_id)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
++j;
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], node->node_arch))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -46,6 +46,8 @@ struct orte_ras_node_t {
opal_list_item_t super;
/** String node name */
char *node_name;
/** Launch id - needed by some systems to launch a proc on this node */
int32_t launch_id;
/** String of the architecture for the node. This is permitted to
be NULL if it is not known. */
char *node_arch;

Просмотреть файл

@ -30,7 +30,13 @@
extern "C" {
#endif
ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_tm_component;
struct orte_ras_tm_component_t {
orte_ras_base_component_t super;
char *nodefile_dir;
};
typedef struct orte_ras_tm_component_t orte_ras_tm_component_t;
ORTE_DECLSPEC extern orte_ras_tm_component_t mca_ras_tm_component;
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_tm_module;
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -39,47 +39,56 @@ static int ras_tm_open(void);
static orte_ras_base_module_t *ras_tm_init(int*);
orte_ras_base_component_t mca_ras_tm_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
orte_ras_tm_component_t mca_ras_tm_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a ras v1.3.0 component (which also
implies a specific MCA version) */
{
/* Indicate that we are a ras v1.3.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_3_0,
/* Component name and version */
"tm",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
ras_tm_open,
NULL
},
ORTE_RAS_BASE_VERSION_1_3_0,
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
/* Component name and version */
"tm",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
ras_tm_open,
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
ras_tm_init
ras_tm_init
}
};
static int ras_tm_open(void)
{
mca_base_component_t *c = &mca_ras_tm_component.super.ras_version;
param_priority =
mca_base_param_reg_int(&mca_ras_tm_component.ras_version,
mca_base_param_reg_int(c,
"priority",
"Priority of the tm ras component",
false, false, 100, NULL);
mca_base_param_reg_string(c, "nodefile_dir",
"The directory where the PBS nodefile can be found",
false, false, "/var/torque/aux",
&mca_ras_tm_component.nodefile_dir);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -27,10 +27,9 @@
#include <sys/time.h>
#endif /* HAVE_SYS_TIME_H */
#include "tm.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/os_path.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/rmgr.h"
@ -47,9 +46,10 @@ static int allocate(orte_jobid_t jobid, opal_list_t *attributes);
static int deallocate(orte_jobid_t jobid);
static int finalize(void);
static int discover(opal_list_t* nodelist);
static int get_tm_hostname(tm_node_id node, char **hostname, char **arch);
static int discover(opal_list_t* nodelist, char *pbs_jobid);
static char *tm_getline(FILE *fp);
#define TM_FILE_MAX_LINE_LENGTH 512
/*
* Global variable
@ -70,28 +70,23 @@ orte_ras_base_module_t orte_ras_tm_module = {
* requested number of nodes/process slots to the job.
*
*/
#include "orte/mca/gpr/gpr.h"
static int allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
int ret;
opal_list_t nodes;
opal_list_item_t* item;
struct tm_roots root;
char *pbs_jobid;
/* Open up our connection to tm */
ret = tm_init(NULL, &root);
if (TM_SUCCESS != ret) {
opal_output(orte_ras_base.ras_output,
"ras:tm:allocate: tm_init failed!");
return ORTE_ERR_RESOURCE_BUSY;
/* get our PBS jobid from the environment */
if (NULL == (pbs_jobid = getenv("PBS_JOBID"))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
OBJ_CONSTRUCT(&nodes, opal_list_t);
if (ORTE_SUCCESS != (ret = discover(&nodes))) {
if (ORTE_SUCCESS != (ret = discover(&nodes, pbs_jobid))) {
opal_output(orte_ras_base.ras_output,
"ras:tm:allocate: discover failed!");
tm_finalize();
return ret;
}
ret = orte_ras_base_allocate_nodes(jobid, &nodes);
@ -110,7 +105,6 @@ static int allocate(orte_jobid_t jobid, opal_list_t *attributes)
opal_output(orte_ras_base.ras_output,
"ras:tm:allocate: failure (base_allocate_nodes=%d)", ret);
}
tm_finalize();
return ret;
}
@ -145,14 +139,15 @@ static int finalize(void)
* - check for additional nodes that have already been allocated
*/
static int discover(opal_list_t* nodelist)
static int discover(opal_list_t* nodelist, char *pbs_jobid)
{
int i, ret, num_node_ids;
int ret;
int32_t nodeid;
orte_ras_node_t *node;
opal_list_item_t* item;
opal_list_t new_nodes;
tm_node_id *tm_node_ids;
char *hostname, *arch;
FILE *fp;
char *hostname, *filename;
struct timeval start, stop;
/* check for timing request - get start time if so */
@ -170,30 +165,24 @@ static int discover(opal_list_t* nodelist)
slightly inefficient, but no big deal); just mentioned for
completeness... */
ret = tm_nodeinfo(&tm_node_ids, &num_node_ids);
if (ret != TM_SUCCESS) {
opal_output(orte_ras_base.ras_output,
"ras:tm:allocate:discover: tm_nodeinfo failed");
return ORTE_ERR_OUT_OF_RESOURCE;
/* setup the full path to the PBS file */
filename = opal_os_path(false, mca_ras_tm_component.nodefile_dir,
pbs_jobid, NULL);
fp = fopen(filename, "r");
if (NULL == fp) {
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
free(filename);
return ORTE_ERR_FILE_OPEN_FAILURE;
}
/* check for timing request - get stop time and report elapsed time if so */
if (orte_ras_base.timing) {
gettimeofday(&stop, NULL);
opal_output(0, "ras_tm: time to do nodeinfo is %ld usec",
(long int)((stop.tv_sec - start.tv_sec)*1000000 +
(stop.tv_usec - start.tv_usec)));
gettimeofday(&start, NULL);
}
/* Iterate through all the nodes and make an entry for each. TM
node ID's will never be duplicated, but they may end up
resolving to the same hostname (i.e., vcpu's on a single
host). */
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
for (i = 0; i < num_node_ids; ++i) {
get_tm_hostname(tm_node_ids[i], &hostname, &arch);
nodeid=0;
while (NULL != (hostname = tm_getline(fp))) {
opal_output(orte_ras_base.ras_output,
"ras:tm:allocate:discover: got hostname %s", hostname);
@ -223,8 +212,7 @@ static int discover(opal_list_t* nodelist)
"ras:tm:allocate:discover: not found -- added to list");
node = OBJ_NEW(orte_ras_node_t);
node->node_name = hostname;
node->node_arch = arch;
node->node_state = ORTE_NODE_STATE_UP;
node->launch_id = nodeid;
node->node_cellid = 0;
node->node_slots_inuse = 0;
node->node_slots_max = 0;
@ -232,23 +220,15 @@ static int discover(opal_list_t* nodelist)
opal_list_append(&new_nodes, &node->super);
} else {
/* Yes, so we need to free the hostname that came back
from get_tm_hostname() */
/* Yes, so we need to free the hostname that came back */
free(hostname);
}
/* up the nodeid */
nodeid++;
}
/* check for timing request - get stop time and report elapsed time if so */
if (orte_ras_base.timing) {
gettimeofday(&stop, NULL);
opal_output(0, "ras_tm: time to get hostnames is %ld usec",
(long int)((stop.tv_sec - start.tv_sec)*1000000 +
(stop.tv_usec - start.tv_usec)));
gettimeofday(&start, NULL);
}
/* Add these nodes to the registry, and return all the values */
/* Add these nodes to the registry */
opal_output(orte_ras_base.ras_output,
"ras:tm:allocate:discover: done -- adding to registry");
@ -271,55 +251,31 @@ static int discover(opal_list_t* nodelist)
"ras:tm:allocate:discover: failed (rc=%d)", ret);
}
OBJ_DESTRUCT(&new_nodes);
/* check for timing request - get stop time and report elapsed time if so */
if (orte_ras_base.timing) {
gettimeofday(&stop, NULL);
opal_output(0, "ras_tm: time to allocate is %ld usec",
(long int)((stop.tv_sec - start.tv_sec)*1000000 +
(stop.tv_usec - start.tv_usec)));
gettimeofday(&start, NULL);
}
return ret;
}
/*
* For a given TM node ID, get the string hostname corresponding to
* it.
*/
static int get_tm_hostname(tm_node_id node, char **hostname, char **arch)
static char *tm_getline(FILE *fp)
{
int ret, local_errno;
tm_event_t event;
char buffer[256];
char **argv;
/* Get the info string corresponding to this TM node ID */
ret = tm_rescinfo(node, buffer, sizeof(buffer) - 1, &event);
if (TM_SUCCESS != ret) {
opal_output(orte_ras_base.ras_output,
"ras:tm:hostname: tm_rescinfo failed");
return ORTE_ERROR;
char *ret, *buff;
char input[TM_FILE_MAX_LINE_LENGTH];
ret = fgets(input, TM_FILE_MAX_LINE_LENGTH, fp);
if (NULL != ret) {
input[strlen(input)-1] = '\0'; /* remove newline */
buff = strdup(input);
return buff;
}
/* Now wait for that event to happen */
ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_errno);
if (TM_SUCCESS != ret) {
return ORTE_ERROR;
}
/* According to the TM man page, we get back a space-separated
string array. The hostname is the second item. Use a cheap
trick to get it. */
opal_output(orte_ras_base.ras_output,
"ras:tm:hostname: got back %s", buffer);
buffer[sizeof(buffer) - 1] = '\0';
argv = opal_argv_split(buffer, ' ');
if (NULL == argv) {
return ORTE_ERROR;
}
*hostname = strdup(argv[1]);
*arch = strdup(buffer);
opal_argv_free(argv);
/* All done */
opal_output(orte_ras_base.ras_output,
"ras:tm:hostname: got hostname %s", *hostname);
return ORTE_SUCCESS;
return NULL;
}

Просмотреть файл

@ -149,6 +149,8 @@ int orte_rmaps_base_copy_mapped_node(orte_mapped_node_t **dest, orte_mapped_node
(*dest)->nodename = strdup(src->nodename);
}
(*dest)->launch_id = src->launch_id;
if (NULL != src->username) {
(*dest)->username = strdup(src->username);
}

Просмотреть файл

@ -179,6 +179,12 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, void *src,
return rc;
}
/* pack the launch id */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->launch_id), 1, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the username */
if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->username), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -159,8 +159,8 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_n
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "%sMapped node:\n%s\tCell: %ld\tNodename: %s\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2,
(long)src->cell, (NULL == src->nodename ? "NULL" : src->nodename),
asprintf(&tmp, "%sMapped node:\n%s\tCell: %ld\tNodename: %s\tLaunch id: %ld\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2,
(long)src->cell, (NULL == src->nodename ? "NULL" : src->nodename), (long)src->launch_id,
(NULL == src->username ? "NULL" : src->username), pfx2);
asprintf(&pfx, "%s\t", pfx2);

Просмотреть файл

@ -222,6 +222,13 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the launch id */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(nodes[i]->launch_id), &n, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the username */
n = 1;
if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer,

Просмотреть файл

@ -51,6 +51,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
bool *bptr, oversub=false;
pid_t *pidptr;
orte_process_name_t *pptr;
int32_t *i32, launch_id;
char *segment;
char *node_name=NULL;
char *username=NULL;
@ -65,6 +66,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
ORTE_PROC_LOCAL_PID_KEY,
ORTE_CELLID_KEY,
ORTE_NODE_NAME_KEY,
ORTE_NODE_LAUNCH_ID_KEY,
ORTE_NODE_USERNAME_KEY,
ORTE_NODE_OVERSUBSCRIBED_KEY,
ORTE_JOB_VPID_START_KEY,
@ -124,6 +126,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
for(v=0; v<num_values; v++) {
value = values[v];
node_name = NULL;
launch_id = -1;
if (0 == strcmp(value->tokens[0], ORTE_JOB_GLOBALS)) {
/* this came from the job_globals container, so look for the related values */
@ -183,6 +186,14 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
proc->name = *pptr;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_LAUNCH_ID_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&i32, keyval->value, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
launch_id = *i32;
continue;
}
if(strcmp(keyval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, keyval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
@ -233,7 +244,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid)
}
}
/* store this process in the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, username, oversub, proc))) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, launch_id, username, oversub, proc))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
@ -382,7 +393,7 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map)
for(i=0; i<num_procs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
segment, 8, 0))) {
segment, 9, 0))) {
ORTE_ERROR_LOG(rc);
for(j=0; j<i; j++) {
OBJ_RELEASE(values[j]);
@ -427,22 +438,27 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map)
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_LAUNCH_ID_KEY, ORTE_INT32, &(node->launch_id)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[8]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}

Просмотреть файл

@ -262,7 +262,7 @@ int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list,
}
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename, int32_t launch_id,
char *username, bool oversubscribed, orte_mapped_proc_t *proc)
{
opal_list_item_t *item;
@ -294,6 +294,7 @@ int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, cha
if (NULL != username) {
node->username = strdup(username);
}
node->launch_id = launch_id;
node->oversubscribed = oversubscribed;
opal_list_append(&map->nodes, &node->super);
@ -352,6 +353,7 @@ int orte_rmaps_base_claim_slot(orte_job_map_t *map,
/* add the proc to the map */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(map, current_node->node_cellid,
current_node->node_name,
current_node->launch_id,
current_node->node_username,
oversub, proc))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -63,6 +63,7 @@ OBJ_CLASS_INSTANCE(orte_mapped_proc_t,
static void orte_rmaps_mapped_node_construct(orte_mapped_node_t* node)
{
node->nodename = NULL;
node->launch_id = -1;
node->username = NULL;
node->daemon = NULL;
node->oversubscribed = false;

Просмотреть файл

@ -158,7 +158,7 @@ void orte_rmaps_base_recv(int status, orte_process_name_t* sender,
* procs. If not, then add new node entry and put this proc
* on its list.
*/
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename,
int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename, int32_t launch_id,
char *username, bool oversubscribed, orte_mapped_proc_t *proc);
ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid,

Просмотреть файл

@ -66,6 +66,7 @@ struct orte_mapped_node_t {
opal_list_item_t super;
orte_cellid_t cell; /* cell where this node is located */
char *nodename; /* name of node */
int32_t launch_id; /* launch id of node - needed by some systems */
char *username;
orte_process_name_t *daemon; /* name of the daemon on this node
* NULL => daemon not assigned yet

Просмотреть файл

@ -62,7 +62,10 @@
#define ORTE_JOBGRP_KEY "orte-jobgrp"
#define ORTE_JOBID_KEY "orte-jobid"
#define ORTE_VPID_KEY "orte-vpid"
/* NODE specific keys */
#define ORTE_NODE_NAME_KEY "orte-node-name"
#define ORTE_NODE_LAUNCH_ID_KEY "orte-node-launch-id"
#define ORTE_NODE_ARCH_KEY "orte-node-arch"
#define ORTE_NODE_STATE_KEY "orte-node-state"
#define ORTE_NODE_SLOTS_KEY "orte-node-slots"
@ -73,6 +76,8 @@
#define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy"
#define ORTE_NODE_USERNAME_KEY "orte-node-username"
#define ORTE_NODE_OVERSUBSCRIBED_KEY "orte-node-oversubscribed"
/* JOB specific keys */
#define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context"
#define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */
#define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start"
@ -82,6 +87,8 @@
#define ORTE_JOB_IOF_KEY "orte-job-iof"
#define ORTE_JOB_STATE_KEY "orte-job-state"
#define ORTE_JOB_MAPPING_MODE_KEY "orte-job-mapping-mode"
/* PROCESS specific keys */
#define ORTE_PROC_NAME_KEY "orte-proc-name"
#define ORTE_PROC_RANK_KEY "orte-proc-rank"
#define ORTE_PROC_PID_KEY "orte-proc-pid"