1
1

Revert r21097 as this results in multiple instantiations of global variables. Instead, fix the problem by including orte_globals.h in the orte_init.c.

Since I already had some changes in there, add in the rmaps rank_file changes - should work okay, but not fully tested.

This commit was SVN r21099.

The following SVN revision numbers were found above:
  r21097 --> open-mpi/ompi@88ae934c26
Этот коммит содержится в:
Ralph Castain 2009-04-29 02:13:14 +00:00
родитель 48e4574907
Коммит 5fa3b38d3c
13 изменённых файлов: 450 добавлений и 360 удалений

Просмотреть файл

@ -256,6 +256,7 @@ PROCESS:
int orte_rmaps_base_claim_slot(orte_job_t *jdata, int orte_rmaps_base_claim_slot(orte_job_t *jdata,
orte_node_t *current_node, orte_node_t *current_node,
orte_vpid_t vpid, orte_vpid_t vpid,
char *slot_list,
orte_std_cntr_t app_idx, orte_std_cntr_t app_idx,
opal_list_t *nodes, opal_list_t *nodes,
bool oversubscribe, bool oversubscribe,
@ -278,10 +279,9 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
proc->app_idx = app_idx; proc->app_idx = app_idx;
OBJ_RETAIN(current_node); /* maintain accounting on object */ OBJ_RETAIN(current_node); /* maintain accounting on object */
if ( NULL != current_node->slot_list) { if ( NULL != slot_list) {
proc->slot_list = strdup(current_node->slot_list); proc->slot_list = strdup(slot_list);
} }
current_node->slot_list = NULL;
proc->node = current_node; proc->node = current_node;
proc->nodename = current_node->name; proc->nodename = current_node->name;

Просмотреть файл

@ -73,6 +73,7 @@ ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_li
ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_job_t *jdata, ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_job_t *jdata,
orte_node_t *current_node, orte_node_t *current_node,
orte_vpid_t vpid, orte_vpid_t vpid,
char *slot_list,
orte_std_cntr_t app_idx, orte_std_cntr_t app_idx,
opal_list_t *nodes, opal_list_t *nodes,
bool oversubscribe, bool oversubscribe,

Просмотреть файл

@ -99,8 +99,3 @@ request to launch on a %d process-per-node basis - only %d slots/node were
available. available.
Either request fewer processes/node, or obtain a larger allocation. Either request fewer processes/node, or obtain a larger allocation.
[orte-rmaps-rf:no-np-and-user-map]
You have specified a rank-to-node/slot mapping, but failed to provide
the number of processes to be executed. This information is critical for Rank
Mapping component.

Просмотреть файл

@ -33,9 +33,9 @@
#endif /* HAVE_STRING_H */ #endif /* HAVE_STRING_H */
#include "opal/mca/base/mca_base_param.h" #include "opal/mca/base/mca_base_param.h"
#include "opal/util/trace.h"
#include "opal/util/argv.h" #include "opal/util/argv.h"
#include "opal/util/if.h" #include "opal/util/if.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/util/show_help.h" #include "orte/util/show_help.h"
@ -46,125 +46,53 @@
#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_globals.h"
#include "orte/mca/ras/ras_types.h" #include "orte/mca/ras/ras_types.h"
static int orte_rmaps_rank_file_parse(const char *, int); static int orte_rmaps_rank_file_parse(const char *);
static char *orte_rmaps_rank_file_parse_string_or_int(void); static char *orte_rmaps_rank_file_parse_string_or_int(void);
char *orte_rmaps_rank_file_path = NULL; char *orte_rmaps_rank_file_path = NULL;
static const char *orte_rmaps_rank_file_name_cur = NULL; static const char *orte_rmaps_rank_file_name_cur = NULL;
static opal_mutex_t orte_rmaps_rank_file_mutex;
char *orte_rmaps_rank_file_slot_list; char *orte_rmaps_rank_file_slot_list;
/* /*
* Local variable * Local variable
*/ */
static opal_list_item_t *cur_node_item = NULL; static opal_list_item_t *cur_node_item = NULL;
orte_rmaps_rank_file_map_t *rankmap = NULL; static opal_pointer_array_t rankmap;
static int map_app_by_user_map(
orte_app_context_t* app,
orte_job_t* jdata,
orte_vpid_t vpid_start,
opal_list_t* nodes,
opal_list_t* procs)
{
int rc = ORTE_SUCCESS;
opal_list_item_t *next;
orte_node_t *node;
orte_std_cntr_t round_cnt, num_alloc = 0;
OPAL_TRACE(2);
if ( NULL == orte_rmaps_rank_file_path ) {
return ORTE_SUCCESS;
}
while (num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */
if(0 >= opal_list_get_size(nodes) ) {
/* No more nodes to allocate :( */
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:alloc-error",
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* Save the next node we can use before claiming slots, since
* we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */
round_cnt=0;
if ( -1 != rankmap[vpid_start + num_alloc].rank) {
do {
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
round_cnt++;
} else {
next = opal_list_get_next(cur_node_item);
}
/* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item;
cur_node_item = next;
if ( round_cnt == 2 ) {
orte_show_help("help-rmaps_rank_file.txt","bad-host", true,rankmap[num_alloc+vpid_start].node_name);
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
} while ( strcmp(node->name, rankmap[num_alloc + vpid_start].node_name));
node->slot_list = strdup(rankmap[num_alloc+vpid_start].slot_list);
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rankmap[num_alloc+vpid_start].rank, app->idx,
nodes, jdata->map->oversubscribe, true))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report
* an error
*/
if (ORTE_ERR_NODE_FULLY_USED != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
++num_alloc;
}
return ORTE_SUCCESS;
}
/* /*
* Create a default mapping for the application, mapping rank by rank_file and * Create a default mapping for the application, mapping rank by rank_file and
* by node. * by node.
*/ */
static int map_app_by_node( static int map_app_by_node(orte_app_context_t* app,
orte_app_context_t* app, orte_job_t* jdata,
orte_job_t* jdata, orte_vpid_t vpid_start,
orte_vpid_t vpid_start, opal_list_t* nodes )
opal_list_t* nodes )
{ {
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
opal_list_item_t *next; opal_list_item_t *next;
orte_node_t *node; orte_node_t *node;
orte_std_cntr_t num_alloc = 0; orte_std_cntr_t num_alloc = 0;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run /* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of out of resources. We determine that we have "run out of
resources" when all nodes have slots_max processes mapped to them, resources" when all nodes have slots_max processes mapped to them,
thus there are no free slots for a process to be mapped, or we have thus there are no free slots for a process to be mapped, or we have
hit the soft limit on all nodes and are in a "no oversubscribe" state. hit the soft limit on all nodes and are in a "no oversubscribe" state.
If we still have processes that haven't been mapped yet, then it's an If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. "out of resources" error.
In this scenario, we rely on the claim_slot function to handle the In this scenario, we rely on the claim_slot function to handle the
oversubscribed case. The claim_slot function will leave a node on the oversubscribed case. The claim_slot function will leave a node on the
list until it either reaches slots_max OR reaches the list until it either reaches slots_max OR reaches the
soft limit and the "no_oversubscribe" flag has been set - at which point, soft limit and the "no_oversubscribe" flag has been set - at which point,
the node will be removed to prevent any more processes from being mapped to the node will be removed to prevent any more processes from being mapped to
it. Since we are taking one slot from each node as we cycle through, the it. Since we are taking one slot from each node as we cycle through, the
list, oversubscription is automatically taken care of via this logic. list, oversubscription is automatically taken care of via this logic.
*/ */
while (num_alloc < app->num_procs) { while (num_alloc < app->num_procs) {
if ( -1 != rankmap[num_alloc + vpid_start].rank) { if (NULL != opal_pointer_array_get_item(&rankmap, vpid_start+num_alloc)) {
/* this rank was already mapped */
++num_alloc; ++num_alloc;
continue; continue;
} }
@ -189,11 +117,9 @@ static int map_app_by_node(
} }
/* Allocate a slot on this node */ /* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item; node = (orte_node_t*) cur_node_item;
if ( NULL != orte_rmaps_base.slot_list ) { /* pass the base slot list in case it was provided */
node->slot_list = strdup(orte_rmaps_base.slot_list); if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start+num_alloc, orte_rmaps_base.slot_list, app->idx,
} nodes, jdata->map->oversubscribe, true))) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx,
nodes, jdata->map->oversubscribe, true))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report * since the node is fully used up. For now, just don't report
@ -207,7 +133,7 @@ static int map_app_by_node(
++num_alloc; ++num_alloc;
cur_node_item = next; cur_node_item = next;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -215,29 +141,27 @@ static int map_app_by_node(
* Create a default mapping for the application, scheduling ranks byr rank_file * Create a default mapping for the application, scheduling ranks byr rank_file
* and by slot. * and by slot.
*/ */
static int map_app_by_slot( static int map_app_by_slot(orte_app_context_t* app,
orte_app_context_t* app, orte_job_t* jdata,
orte_job_t* jdata, orte_vpid_t vpid_start,
orte_vpid_t vpid_start, opal_list_t* nodes )
opal_list_t* nodes )
{ {
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
orte_std_cntr_t i, num_slots_to_take, num_alloc = 0; orte_std_cntr_t i, num_slots_to_take, num_alloc = 0;
orte_node_t *node; orte_node_t *node;
opal_list_item_t *next; opal_list_item_t *next;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run /* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of out of resources. We determine that we have "run out of
resources" when either all nodes have slots_max processes mapped to them, resources" when either all nodes have slots_max processes mapped to them,
(thus there are no free slots for a process to be mapped), OR all nodes (thus there are no free slots for a process to be mapped), OR all nodes
have reached their soft limit and the user directed us to "no oversubscribe". have reached their soft limit and the user directed us to "no oversubscribe".
If we still have processes that haven't been mapped yet, then it's an If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */ "out of resources" error. */
while ( num_alloc < app->num_procs) { while ( num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check /** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully * each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */ * used) as we cycle through the loop */
if(0 >= opal_list_get_size(nodes) ) { if(0 >= opal_list_get_size(nodes) ) {
/* Everything is at max usage! :( */ /* Everything is at max usage! :( */
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:alloc-error", orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:alloc-error",
@ -246,8 +170,8 @@ static int map_app_by_slot(
} }
/* Save the next node we can use before claiming slots, since /* Save the next node we can use before claiming slots, since
* we may need to prune the nodes list removing overused nodes. * we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */ * Wrap around to beginning if we are at the end of the list */
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) { if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes); next = opal_list_get_first(nodes);
} else { } else {
@ -275,8 +199,8 @@ static int map_app_by_slot(
* many processes as another before oversubscribing, it will continue * many processes as another before oversubscribing, it will continue
* to do so after oversubscribing). * to do so after oversubscribing).
*/ */
if (0 == node->slots_inuse || if (0 == node->slots_inuse ||
node->slots_inuse >= node->slots_alloc) { node->slots_inuse >= node->slots_alloc) {
num_slots_to_take = (node->slots_alloc == 0) ? 1 : node->slots_alloc; num_slots_to_take = (node->slots_alloc == 0) ? 1 : node->slots_alloc;
@ -290,17 +214,16 @@ static int map_app_by_slot(
if (jdata->map->pernode) { if (jdata->map->pernode) {
num_slots_to_take = jdata->map->npernode; num_slots_to_take = jdata->map->npernode;
} }
for( i = 0; i < num_slots_to_take; ++i) { for( i = 0; i < num_slots_to_take; ++i) {
if ( -1 != rankmap[num_alloc + vpid_start].rank) { if (NULL != opal_pointer_array_get_item(&rankmap, vpid_start+num_alloc)) {
/* this rank was already mapped */
++num_alloc; ++num_alloc;
continue; continue;
} }
if ( NULL != orte_rmaps_base.slot_list ) { /* pass the base slot list in case it was provided */
node->slot_list = strdup(orte_rmaps_base.slot_list); if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start+num_alloc, orte_rmaps_base.slot_list, app->idx,
} nodes, jdata->map->oversubscribe, true))) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx,
nodes, jdata->map->oversubscribe, true))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report * since the node is fully used up. For now, just don't report
@ -311,9 +234,9 @@ static int map_app_by_slot(
return rc; return rc;
} }
} }
/* Update the number of procs allocated */ /* Update the rank */
++num_alloc; ++num_alloc;
/** if all the procs have been mapped OR we have fully used up this node, then /** if all the procs have been mapped OR we have fully used up this node, then
* break from the loop * break from the loop
*/ */
@ -321,7 +244,7 @@ static int map_app_by_slot(
break; break;
} }
} }
/* we move on to the next node in all cases EXCEPT if we came /* we move on to the next node in all cases EXCEPT if we came
* out of the loop without having taken a full bite AND the * out of the loop without having taken a full bite AND the
* node is NOT max'd out * node is NOT max'd out
@ -332,10 +255,9 @@ static int map_app_by_slot(
} }
cur_node_item = next; cur_node_item = next;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* /*
* Create a rank_file mapping for the job. * Create a rank_file mapping for the job.
@ -344,91 +266,90 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
{ {
orte_job_map_t *map; orte_job_map_t *map;
orte_app_context_t *app=NULL, **apps; orte_app_context_t *app=NULL, **apps;
orte_std_cntr_t i, j; orte_std_cntr_t i, k;
opal_list_t node_list, procs; orte_vpid_t total_procs;
opal_list_t node_list;
opal_list_item_t *item; opal_list_item_t *item;
orte_node_t *node; orte_node_t *node, *nd;
orte_vpid_t vpid_start; orte_vpid_t rank, vpid_start;
orte_std_cntr_t num_nodes, num_slots; orte_std_cntr_t num_nodes, num_slots;
int rc; orte_rmaps_rank_file_map_t *rfmap;
orte_std_cntr_t slots_per_node; orte_std_cntr_t slots_per_node;
int rc;
OPAL_TRACE(1);
/* convenience def */
/* conveniece def */
map = jdata->map; map = jdata->map;
apps = (orte_app_context_t**)jdata->apps->addr; apps = (orte_app_context_t**)jdata->apps->addr;
/* SANITY CHECKS */
/* if the number of processes wasn't specified, then we know there can be only
* one app_context allowed in the launch, and that we are to launch it across
* all available slots. We'll double-check the single app_context rule first
*/
if (0 == apps[0]->num_procs && 1 < jdata->num_apps) {
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:multi-apps-and-zero-np",
true, jdata->num_apps, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
/* likewise, we only support pernode options for a single app_context */
if (map->pernode && 1 < jdata->num_apps) {
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:multi-apps-and-zero-np",
true, jdata->num_apps, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
/* END SANITY CHECKS */
/* flag the map as containing cpu_lists */ /* flag the map as containing cpu_lists */
map->cpu_lists = true; map->cpu_lists = true;
/* start at the beginning... */ /* start at the beginning... */
vpid_start = 0; vpid_start = 0;
jdata->num_procs = 0;
/* cycle through the app_contexts, mapping them sequentially */ total_procs = 0;
for(i=0; i < jdata->num_apps; i++) { OBJ_CONSTRUCT(&node_list, opal_list_t);
app = apps[i]; OBJ_CONSTRUCT(&rankmap, opal_pointer_array_t);
/* if the number of processes wasn't specified, then we know there can be only /* parse the rankfile, storing its results in the rankmap */
* one app_context allowed in the launch, and that we are to launch it across if ( NULL != orte_rmaps_rank_file_path ) {
* all available slots. We'll double-check the single app_context rule first if ( ORTE_SUCCESS != (rc = orte_rmaps_rank_file_parse(orte_rmaps_rank_file_path))) {
*/ ORTE_ERROR_LOG(rc);
if (0 == app->num_procs && 1 < jdata->num_apps) {
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:multi-apps-and-zero-np",
true, jdata->num_apps, NULL);
rc = ORTE_ERR_SILENT;
goto error; goto error;
} }
}
/* cycle through the app_contexts, mapping them sequentially */
for(i=0; i < jdata->num_apps; i++) {
app = apps[i];
/* for each app_context, we have to get the list of nodes that it can /* for each app_context, we have to get the list of nodes that it can
* use since that can now be modified with a hostfile and/or -host * use since that can now be modified with a hostfile and/or -host
* option * option
*/ */
OBJ_CONSTRUCT(&node_list, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
map->policy))) { map->policy))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto error; goto error;
} }
num_nodes = (orte_std_cntr_t)opal_list_get_size(&node_list); num_nodes = (orte_std_cntr_t)opal_list_get_size(&node_list);
rankmap = (orte_rmaps_rank_file_map_t *) malloc ( app->num_procs * sizeof(orte_rmaps_rank_file_map_t)); /* we already checked for sanity, so these are okay to just do here */
for ( j=0; j<app->num_procs; j++) {
rankmap[j].rank = -1;
rankmap[j].slot_list = (char *)malloc(64*sizeof(char));
}
if ( NULL != orte_rmaps_rank_file_path ) {
if ( ORTE_SUCCESS != (rc = orte_rmaps_rank_file_parse(orte_rmaps_rank_file_path, app->num_procs))) {
ORTE_ERROR_LOG(rc);
goto error;
}
}
/* if a bookmark exists from some prior mapping, set us to start there */
if (NULL != jdata->bookmark) {
cur_node_item = NULL;
/* find this node on the list */
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (node->index == jdata->bookmark->index) {
cur_node_item = item;
break;
}
}
/* see if we found it - if not, just start at the beginning */
if (NULL == cur_node_item) {
cur_node_item = opal_list_get_first(&node_list);
}
} else {
/* if no bookmark, then just start at the beginning of the list */
cur_node_item = opal_list_get_first(&node_list);
}
if (map->pernode && map->npernode == 1) { if (map->pernode && map->npernode == 1) {
if (app->num_procs > num_nodes) { /* there are three use-cases that we need to deal with:
* (a) if -np was not provided, then we just use the number of nodes
* (b) if -np was provided AND #procs > #nodes, then error out
* (c) if -np was provided AND #procs <= #nodes, then launch
* the specified #procs one/node. In this case, we just
* leave app->num_procs alone
*/
if (0 == app->num_procs) {
app->num_procs = num_nodes;
} else if (app->num_procs > num_nodes) {
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:per-node-and-too-many-procs", orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:per-node-and-too-many-procs",
true, app->num_procs, num_nodes, NULL); true, app->num_procs, num_nodes, NULL);
rc = ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
@ -445,36 +366,68 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
rc = ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
goto error; goto error;
} }
if (app->num_procs > (map->npernode * num_nodes)) { /* there are three use-cases that we need to deal with:
* (a) if -np was not provided, then we just use the n/node * #nodes
* (b) if -np was provided AND #procs > (n/node * #nodes), then error out
* (c) if -np was provided AND #procs <= (n/node * #nodes), then launch
* the specified #procs n/node. In this case, we just
* leave app->num_procs alone
*/
if (0 == app->num_procs) {
/* set the num_procs to equal the specified num/node * the number of nodes */
app->num_procs = map->npernode * num_nodes;
} else if (app->num_procs > (map->npernode * num_nodes)) {
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:n-per-node-and-too-many-procs", orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:n-per-node-and-too-many-procs",
true, app->num_procs, map->npernode, num_nodes, num_slots, NULL); true, app->num_procs, map->npernode, num_nodes, num_slots, NULL);
rc = ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
goto error; goto error;
} }
} } else if (0 == app->num_procs) {
/** track the total number of processes we mapped */ /** set the num_procs to equal the number of slots on these mapped nodes */
jdata->num_procs += app->num_procs; app->num_procs = num_slots;
/* Make assignments */
if ( ORTE_SUCCESS != (rc = map_app_by_user_map(app, jdata, vpid_start, &node_list, &procs))) {
goto error;
} }
/* assign unassigned ranks by map policy */ /* keep track of the total #procs in this job */
if (map->policy == ORTE_RMAPS_BYNODE) { total_procs += app->num_procs;
rc = map_app_by_node(app, jdata, vpid_start, &node_list);
} else {
rc = map_app_by_slot(app, jdata, vpid_start, &node_list);
}
if (ORTE_SUCCESS != rc) { for (k=0; k < app->num_procs; k++) {
ORTE_ERROR_LOG(rc); rank = vpid_start + k;
goto error; /* get the rankfile entry for this rank */
if (NULL == (rfmap = opal_pointer_array_get_item(&rankmap, rank))) {
/* no entry for this rank */
continue;
}
/* find the node where this proc was assigned */
node = NULL;
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
nd = (orte_node_t*)item;
if (NULL != rfmap->node_name &&
0 == strcmp(nd->name, rfmap->node_name)) {
node = nd;
break;
}
}
if (NULL == node) {
orte_show_help("help-rmaps_rank_file.txt","bad-host", true, rfmap->node_name);
return ORTE_ERR_SILENT;
}
if (NULL == rfmap->slot_list) {
/* rank was specified but no slot list given - that's an error */
orte_show_help("help-rmaps_rank_file.txt","no-slot-list", true, rank, rfmap->node_name);
return ORTE_ERR_SILENT;
}
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, rank, rfmap->slot_list,
app->idx, &node_list, jdata->map->oversubscribe, false))) {
ORTE_ERROR_LOG(rc);
return rc;
}
jdata->num_procs++;
} }
/* update the starting point */
/* save the bookmark */ vpid_start += app->num_procs;
jdata->bookmark = (orte_node_t*)cur_node_item;
/* cleanup the node list - it can differ from one app_context /* cleanup the node list - it can differ from one app_context
* to another, so we have to get it every time * to another, so we have to get it every time
*/ */
@ -482,8 +435,75 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
OBJ_RELEASE(item); OBJ_RELEASE(item);
} }
OBJ_DESTRUCT(&node_list); OBJ_DESTRUCT(&node_list);
OBJ_CONSTRUCT(&node_list, opal_list_t);
} }
OBJ_DESTRUCT(&node_list);
/* did we map all the procs, or did the user's rankfile not contain
* a specification for every rank?
*/
if (jdata->num_procs < total_procs) {
/* we need to map the remainder of the procs according to the
* mapping policy
*/
vpid_start = 0;
for(i=0; i < jdata->num_apps; i++) {
app = apps[i];
/* for each app_context, we have to get the list of nodes that it can
* use since that can now be modified with a hostfile and/or -host
* option
*/
OBJ_CONSTRUCT(&node_list, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* if a bookmark exists from some prior mapping, set us to start there */
if (NULL != jdata->bookmark) {
cur_node_item = NULL;
/* find this node on the list */
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (node->index == jdata->bookmark->index) {
cur_node_item = item;
break;
}
}
/* see if we found it - if not, just start at the beginning */
if (NULL == cur_node_item) {
cur_node_item = opal_list_get_first(&node_list);
}
} else {
/* if no bookmark, then just start at the beginning of the list */
cur_node_item = opal_list_get_first(&node_list);
}
if (map->policy == ORTE_RMAPS_BYNODE) {
rc = map_app_by_node(app, jdata, vpid_start, &node_list);
} else {
rc = map_app_by_slot(app, jdata, vpid_start, &node_list);
}
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto error;
}
vpid_start += app->num_procs;
/* cleanup the node list - it can differ from one app_context
* to another, so we have to get it every time
*/
while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
}
/* save the bookmark */
jdata->bookmark = (orte_node_t*)cur_node_item;
}
/* compute and save convenience values */ /* compute and save convenience values */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_usage(jdata))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_usage(jdata))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -495,14 +515,14 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
for (j=0; j<app->num_procs; j++) {
if (NULL != rankmap[j].slot_list) { /* cleanup the rankmap */
free (rankmap[j].slot_list ); for (i=0; i < rankmap.size; i++) {
if (NULL != (rfmap = opal_pointer_array_get_item(&rankmap, i))) {
OBJ_RELEASE(rfmap);
} }
} }
if (NULL != rankmap) { OBJ_DESTRUCT(&rankmap);
free(rankmap);
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
error: error:
@ -510,68 +530,92 @@ error:
OBJ_RELEASE(item); OBJ_RELEASE(item);
} }
OBJ_DESTRUCT(&node_list); OBJ_DESTRUCT(&node_list);
for (j=0; j<app->num_procs; j++) {
if (NULL != rankmap[j].slot_list) {
free (rankmap[j].slot_list );
}
}
if (NULL != rankmap) {
free(rankmap);
}
return rc; return rc;
} }
orte_rmaps_base_module_t orte_rmaps_rank_file_module = { orte_rmaps_base_module_t orte_rmaps_rank_file_module = {
orte_rmaps_rf_map orte_rmaps_rf_map
}; };
static int orte_rmaps_rank_file_parse(const char *rankfile, int np) static int orte_rmaps_rank_file_parse(const char *rankfile)
{ {
int token; int token;
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
int line_number = 1; int cnt;
int cnt; char* node_name = NULL;
char* node_name = NULL; char* username = NULL;
char* username = NULL; char** argv;
char** argv; char buff[64];
char buff[64]; char* value;
char* value; int rank=-1;
int ival=-1; int i;
orte_node_t *hnp_node;
orte_rmaps_rank_file_map_t *rfmap=NULL;
orte_rmaps_rank_file_name_cur = rankfile; OPAL_THREAD_LOCK(&orte_rmaps_rank_file_mutex);
orte_rmaps_rank_file_done = false;
orte_rmaps_rank_file_in = fopen(rankfile, "r"); /* get the hnp node's info */
hnp_node = (orte_node_t*)(orte_node_pool->addr[0]);
if (NULL == orte_rmaps_rank_file_in) {
orte_show_help("help-rmaps_rank_file.txt", "no-rankfile", true, rankfile, np); orte_rmaps_rank_file_name_cur = rankfile;
rc = OPAL_ERR_NOT_FOUND; orte_rmaps_rank_file_done = false;
goto unlock; orte_rmaps_rank_file_in = fopen(rankfile, "r");
}
if (NULL == orte_rmaps_rank_file_in) {
if ( 0 == np ) { orte_show_help("help-rmaps_rank_file.txt", "no-rankfile", true, rankfile);
orte_show_help("help-rmaps_rank_file.txt", "orte-rmaps-rf:no-np-and-user-map", true, NULL); rc = OPAL_ERR_NOT_FOUND;
return ORTE_ERR_BAD_PARAM; ORTE_ERROR_LOG(rc);
} goto unlock;
}
while (!orte_rmaps_rank_file_done) { while (!orte_rmaps_rank_file_done) {
token = orte_rmaps_rank_file_lex(); token = orte_rmaps_rank_file_lex();
switch (token) { switch (token) {
case ORTE_RANKFILE_DONE: case ORTE_RANKFILE_ERROR:
orte_rmaps_rank_file_done = true; opal_output(0, "Got an error!");
break; break;
case ORTE_RANKFILE_QUOTED_STRING:
orte_show_help("help-rmaps_rank_file.txt", "not-supported-rankfile", true, "QUOTED_STRING", rankfile);
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto unlock;
case ORTE_RANKFILE_NEWLINE: case ORTE_RANKFILE_NEWLINE:
line_number++; rank = -1;
if (NULL != node_name) {
free(node_name);
}
node_name = NULL;
rfmap = NULL;
break; break;
case ORTE_RANKFILE_RANK: case ORTE_RANKFILE_RANK:
token = orte_rmaps_rank_file_lex();
if (ORTE_RANKFILE_INT == token) {
rank = orte_rmaps_rank_file_value.ival;
rfmap = OBJ_NEW(orte_rmaps_rank_file_map_t);
opal_pointer_array_set_item(&rankmap, rank, rfmap);
} else {
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto unlock;
}
break;
case ORTE_RANKFILE_USERNAME:
orte_show_help("help-rmaps_rank_file.txt", "not-supported-rankfile", true, "USERNAME", rankfile);
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto unlock;
break; break;
case ORTE_RANKFILE_EQUAL: case ORTE_RANKFILE_EQUAL:
ival = orte_rmaps_rank_file_value.ival; if (rank < 0) {
if ( ival > (np-1) ) { orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
orte_show_help("help-rmaps_rank_file.txt", "bad-rankfile", true, ival, rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto unlock; goto unlock;
} }
token = orte_rmaps_rank_file_lex(); token = orte_rmaps_rank_file_lex();
switch (token) { switch (token) {
case ORTE_RANKFILE_HOSTNAME: case ORTE_RANKFILE_HOSTNAME:
@ -579,73 +623,96 @@ static int orte_rmaps_rank_file_parse(const char *rankfile, int np)
case ORTE_RANKFILE_IPV6: case ORTE_RANKFILE_IPV6:
case ORTE_RANKFILE_STRING: case ORTE_RANKFILE_STRING:
case ORTE_RANKFILE_INT: case ORTE_RANKFILE_INT:
if(ORTE_RANKFILE_INT == token) { if(ORTE_RANKFILE_INT == token) {
sprintf(buff,"%d", orte_rmaps_rank_file_value.ival); sprintf(buff,"%d", orte_rmaps_rank_file_value.ival);
value = buff; value = buff;
} else { } else {
value = orte_rmaps_rank_file_value.sval; value = orte_rmaps_rank_file_value.sval;
} }
argv = opal_argv_split (value, '@'); argv = opal_argv_split (value, '@');
cnt = opal_argv_count (argv); cnt = opal_argv_count (argv);
if (1 == cnt) { if (1 == cnt) {
node_name = strdup(argv[0]); node_name = strdup(argv[0]);
} else if (2 == cnt) { } else if (2 == cnt) {
username = strdup(argv[0]); username = strdup(argv[0]);
node_name = strdup(argv[1]); node_name = strdup(argv[1]);
} }
else { else {
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
goto unlock; ORTE_ERROR_LOG(rc);
} goto unlock;
opal_argv_free (argv); }
rankmap[ival].rank = ival; opal_argv_free (argv);
rankmap[ival].node_name = strdup(node_name); /* check the rank item */
/* convert this into something globally unique */ if (NULL == rfmap) {
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) { orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
/* Nodename has been allocated, that is for sure */ rc = ORTE_ERR_BAD_PARAM;
free (node_name); ORTE_ERROR_LOG(rc);
} goto unlock;
}
/* check if this is the local node */
if (0 == strcmp(node_name, hnp_node->name) ||
opal_ifislocal(node_name)) {
rfmap->node_name = strdup(hnp_node->name);
} else {
rfmap->node_name = strdup(node_name);
}
} }
break; break;
case ORTE_RANKFILE_SLOT: case ORTE_RANKFILE_SLOT:
if ( NULL == (value = orte_rmaps_rank_file_parse_string_or_int())) { if (NULL == node_name || rank < 0 ||
NULL == (value = orte_rmaps_rank_file_parse_string_or_int())) {
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile); orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto unlock; goto unlock;
} }
rankmap[ival].slot_list = strdup(value); /* check the rank item */
break; if (NULL == rfmap) {
orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto unlock;
}
for (i=0; i < 64 && '\0' != value[i]; i++) {
rfmap->slot_list[i] = value[i];
}
break;
} }
} }
fclose(orte_rmaps_rank_file_in); fclose(orte_rmaps_rank_file_in);
orte_rmaps_rank_file_in = NULL; orte_rmaps_rank_file_in = NULL;
unlock: unlock:
if (NULL != node_name) {
free(node_name);
}
orte_rmaps_rank_file_name_cur = NULL; orte_rmaps_rank_file_name_cur = NULL;
OPAL_THREAD_UNLOCK(&orte_rmaps_rank_file_mutex);
return rc; return rc;
} }
static char *orte_rmaps_rank_file_parse_string_or_int(void) static char *orte_rmaps_rank_file_parse_string_or_int(void)
{ {
int rc; int rc;
char tmp_str[64]; char tmp_str[64];
if (ORTE_RANKFILE_EQUAL != orte_rmaps_rank_file_lex()){ if (ORTE_RANKFILE_EQUAL != orte_rmaps_rank_file_lex()){
return NULL; return NULL;
} }
rc = orte_rmaps_rank_file_lex(); rc = orte_rmaps_rank_file_lex();
switch (rc) { switch (rc) {
case ORTE_RANKFILE_STRING: case ORTE_RANKFILE_STRING:
return strdup(orte_rmaps_rank_file_value.sval); return strdup(orte_rmaps_rank_file_value.sval);
case ORTE_RANKFILE_INT: case ORTE_RANKFILE_INT:
sprintf(tmp_str,"%d",orte_rmaps_rank_file_value.ival); sprintf(tmp_str,"%d",orte_rmaps_rank_file_value.ival);
return strdup(tmp_str); return strdup(tmp_str);
default: default:
return NULL; return NULL;
} }
} }

Просмотреть файл

@ -24,17 +24,17 @@
*/ */
#include "orte_config.h"
#include "opal_config.h"
#include "opal/mca/paffinity/paffinity.h"
#ifndef ORTE_RMAPS_RF_H #ifndef ORTE_RMAPS_RF_H
#define ORTE_RMAPS_RF_H #define ORTE_RMAPS_RF_H
#include "orte_config.h"
#include "opal/class/opal_object.h"
#include "opal/mca/paffinity/paffinity.h"
#include "orte/mca/rmaps/rmaps.h" #include "orte/mca/rmaps/rmaps.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" { BEGIN_C_DECLS
#endif
/** /**
* RMGR Component * RMGR Component
@ -56,16 +56,14 @@ extern char *orte_rmaps_rank_file_path;
typedef struct cpu_socket_t cpu_socket_t; typedef struct cpu_socket_t cpu_socket_t;
struct orte_rmaps_rank_file_map_t { struct orte_rmaps_rank_file_map_t {
int rank; opal_object_t super;
char* node_name; char* node_name;
char* slot_list; char slot_list[64];
}; };
typedef struct orte_rmaps_rank_file_map_t orte_rmaps_rank_file_map_t; typedef struct orte_rmaps_rank_file_map_t orte_rmaps_rank_file_map_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_rank_file_map_t); ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_rank_file_map_t);
#if defined(c_plusplus) || defined(__cplusplus) END_C_DECLS
}
#endif
#endif #endif

Просмотреть файл

@ -106,3 +106,16 @@ static int orte_rmaps_rank_file_close(void)
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static void rf_map_construct(orte_rmaps_rank_file_map_t *ptr)
{
ptr->node_name = NULL;
memset(ptr->slot_list, (char)0x00, 64);
}
static void rf_map_destruct(orte_rmaps_rank_file_map_t *ptr)
{
if (NULL != ptr->node_name) free(ptr->node_name);
}
OBJ_CLASS_INSTANCE(orte_rmaps_rank_file_map_t,
opal_object_t,
rf_map_construct,
rf_map_destruct);

Просмотреть файл

@ -24,24 +24,19 @@
#ifdef HAVE_UNISTD_H #ifdef HAVE_UNISTD_H
#include <unistd.h> #include <unistd.h>
#endif #endif
#include "orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h" #include "orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h"
#include "opal/util/output.h"
#if defined(c_plusplus) || defined(__cplusplus) BEGIN_C_DECLS
extern "C" {
#endif
/* static int finish_parsing(void);
* local functions int orte_rmaps_rank_file_yywrap(void);
*/
int orte_rmaps_rank_file_wrap(void)
{
orte_rmaps_rank_file_done = true;
return 1;
}
#if defined(c_plusplus) || defined(__cplusplus) END_C_DECLS
}
#endif #define yyterminate() \
return finish_parsing()
/* /*
* global variables * global variables
@ -121,3 +116,25 @@ username { orte_rmaps_rank_file_value.sval = yytext;
%% %%
/*
* This cleans up at the end of the parse (since, in this case, we
* always parse the entire file) and prevents a memory leak.
*/
static int finish_parsing(void)
{
if (NULL != YY_CURRENT_BUFFER) {
yy_delete_buffer(YY_CURRENT_BUFFER);
#if defined(YY_CURRENT_BUFFER_LVALUE)
YY_CURRENT_BUFFER_LVALUE = NULL;
#else
YY_CURRENT_BUFFER = NULL;
#endif /* YY_CURRENT_BUFFER_LVALUE */
}
return YY_NULL;
}
int orte_rmaps_rank_file_wrap(void)
{
orte_rmaps_rank_file_done = true;
return 1;
}

Просмотреть файл

@ -104,7 +104,7 @@ static int map_app_by_node(
/* Allocate a slot on this node */ /* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item; node = (orte_node_t*) cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, NULL, app->idx,
nodes, jdata->map->oversubscribe, true))) { nodes, jdata->map->oversubscribe, true))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
@ -217,7 +217,7 @@ static int map_app_by_slot(
} }
for( i = 0; i < num_slots_to_take; ++i) { for( i = 0; i < num_slots_to_take; ++i) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, NULL, app->idx,
nodes, jdata->map->oversubscribe, true))) { nodes, jdata->map->oversubscribe, true))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop

Просмотреть файл

@ -210,7 +210,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
* an oversubscribed node from the list! * an oversubscribed node from the list!
*/ */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
vpid, app->idx, vpid, NULL, app->idx,
node_list, node_list,
jdata->map->oversubscribe, jdata->map->oversubscribe,
false))) { false))) {

Просмотреть файл

@ -110,7 +110,7 @@ static int map_app_by_node(
/* Allocate a slot on this node */ /* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item; node = (orte_node_t*) cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, NULL, app->idx,
nodes, jdata->map->oversubscribe, true))) { nodes, jdata->map->oversubscribe, true))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
@ -217,7 +217,7 @@ static int map_app_by_slot(
} }
for( i = 0; i < num_slots_to_take; ++i) { for( i = 0; i < num_slots_to_take; ++i) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, NULL, app->idx,
nodes, jdata->map->oversubscribe, true))) { nodes, jdata->map->oversubscribe, true))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop

Просмотреть файл

@ -603,7 +603,6 @@ static void orte_node_construct(orte_node_t* node)
node->slots_alloc = 0; node->slots_alloc = 0;
node->slots_max = 0; node->slots_max = 0;
node->username = NULL; node->username = NULL;
node->slot_list = NULL;
} }
static void orte_node_destruct(orte_node_t* node) static void orte_node_destruct(orte_node_t* node)

Просмотреть файл

@ -53,9 +53,9 @@ ORTE_DECLSPEC extern bool orte_help_want_aggregate; /* instantiated in orte/uti
/* Shortcut for some commonly used names */ /* Shortcut for some commonly used names */
#define ORTE_NAME_WILDCARD (&orte_name_wildcard) #define ORTE_NAME_WILDCARD (&orte_name_wildcard)
ORTE_DECLSPEC orte_process_name_t orte_name_wildcard; /** instantiated in orte/runtime/orte_init.c */ ORTE_DECLSPEC extern orte_process_name_t orte_name_wildcard; /** instantiated in orte/runtime/orte_init.c */
#define ORTE_NAME_INVALID (&orte_name_invalid) #define ORTE_NAME_INVALID (&orte_name_invalid)
ORTE_DECLSPEC orte_process_name_t orte_name_invalid; /** instantiated in orte/runtime/orte_init.c */ ORTE_DECLSPEC extern orte_process_name_t orte_name_invalid; /** instantiated in orte/runtime/orte_init.c */
#define ORTE_PROC_MY_NAME (&orte_process_info.my_name) #define ORTE_PROC_MY_NAME (&orte_process_info.my_name)
@ -242,7 +242,6 @@ typedef struct {
orte_std_cntr_t slots_max; orte_std_cntr_t slots_max;
/** Username on this node, if specified */ /** Username on this node, if specified */
char *username; char *username;
char *slot_list;
} orte_node_t; } orte_node_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_node_t); ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_node_t);

Просмотреть файл

@ -42,6 +42,7 @@
#include "orte/util/proc_info.h" #include "orte/util/proc_info.h"
#include "orte/runtime/runtime.h" #include "orte/runtime/runtime.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_locks.h" #include "orte/runtime/orte_locks.h"
/* /*