Fix support for Cray alps
The alps ras and plm components were broken by recent changes in ORTE. This commit resolves those issues. Changes: - Define PMI2_SUCCESS if it isn't defined. This fixes a problem with Cray's PMI implementation which does not define (for some reason) PMI2_SUCCESS. We had previously just used PMI_SUCCESS. - Add missing definition and a typo in pml_alps_module. - launch_id is no longer available in the orte_node_t structure. Use the attribute lookup to get the value. - Do not use an O(n^2) sorting algorithm when putting alps nodes in order. Use opal_list_sort instead (O(nlogn)). This commit was SVN r32076.
Этот коммит содержится в:
родитель
bce33635a7
Коммит
563eaf0726
@ -26,6 +26,11 @@
|
||||
#include <pmi.h>
|
||||
#if WANT_PMI2_SUPPORT
|
||||
#include <pmi2.h>
|
||||
|
||||
#if !defined(PMI2_SUCCESS)
|
||||
#define PMI2_SUCCESS PMI_SUCCESS
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include "common_pmi.h"
|
||||
|
@ -374,7 +374,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
the ALPS plm) */
|
||||
cur_prefix = NULL;
|
||||
for (i=0; i < state->jdata->apps->size; i++) {
|
||||
char *app_prefix_dir;
|
||||
char *app_prefix_dir = NULL;
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, i))) {
|
||||
continue;
|
||||
}
|
||||
@ -544,6 +544,7 @@ static int plm_alps_start_proc(int argc, char **argv, char **env,
|
||||
char *prefix)
|
||||
{
|
||||
int fd;
|
||||
pid_t alps_pid;
|
||||
char *exec_argv = opal_path_findv(argv[0], 0, env, NULL);
|
||||
|
||||
if (NULL == exec_argv) {
|
||||
@ -559,7 +560,7 @@ static int plm_alps_start_proc(int argc, char **argv, char **env,
|
||||
alpsrun = OBJ_NEW(orte_proc_t);
|
||||
alpsrun->pid = alps_pid;
|
||||
/* be sure to mark it as alive so we don't instantly fire */
|
||||
ORTE_FLAG_SET(dummy, ORTE_PROC_FLAG_ALIVE);
|
||||
ORTE_FLAG_SET(alpsrun, ORTE_PROC_FLAG_ALIVE);
|
||||
/* setup the waitpid so we can find out if alps succeeds! */
|
||||
orte_wait_cb(alpsrun, alps_wait_cb, NULL);
|
||||
|
||||
|
@ -365,6 +365,25 @@ ras_alps_getline(FILE *fp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int compare_nodes (opal_list_item_t **a, opal_list_item_t **b)
|
||||
{
|
||||
orte_node_t *nodea = (orte_node_t *) *a;
|
||||
orte_node_t *nodeb = (orte_node_t *) *b;
|
||||
int32_t launcha, launchb, *ldptr;
|
||||
|
||||
ldptr = &launcha;
|
||||
if (!orte_get_attribute(&nodea->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ldptr = &launchb;
|
||||
if (!orte_get_attribute(&nodea->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return (launcha > launchb) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
|
||||
unsigned int *uMe)
|
||||
@ -392,7 +411,6 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
|
||||
#else
|
||||
placeNodeList_t *apNodes;
|
||||
#endif
|
||||
bool added;
|
||||
opal_list_item_t *item;
|
||||
|
||||
orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts);
|
||||
@ -529,22 +547,8 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
|
||||
/* need to order these node ids so the regex generator
|
||||
* can properly function
|
||||
*/
|
||||
added = false;
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
n2 = (orte_node_t*)item;
|
||||
if (node->launch_id < n2->launch_id) {
|
||||
/* insert the new node before this one */
|
||||
opal_list_insert_pos(nodes, item, &node->super);
|
||||
added = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!added) {
|
||||
/* add it to the end */
|
||||
opal_list_append(nodes, &node->super);
|
||||
}
|
||||
/* add it to the end */
|
||||
opal_list_append(nodes, &node->super);
|
||||
sNodes++; /* Increment the node count */
|
||||
}
|
||||
}
|
||||
@ -572,34 +576,23 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
|
||||
|
||||
node = OBJ_NEW(orte_node_t);
|
||||
node->name = hostname;
|
||||
orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apSlots[ix].nid, OPAL_INT32);
|
||||
orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apNodes[ix].nid, OPAL_INT32);
|
||||
node->slots_inuse = 0;
|
||||
node->slots_max = 0;
|
||||
node->slots = apNodes[ix].numPEs;
|
||||
/* need to order these node ids so the regex generator
|
||||
* can properly function
|
||||
*/
|
||||
added = false;
|
||||
for (item = opal_list_get_first(nodes);
|
||||
item != opal_list_get_end(nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
n2 = (orte_node_t*)item;
|
||||
if (node->launch_id < n2->launch_id) {
|
||||
/* insert the new node before this one */
|
||||
opal_list_insert_pos(nodes, item, &node->super);
|
||||
added = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!added) {
|
||||
/* add it to the end */
|
||||
opal_list_append(nodes, &node->super);
|
||||
}
|
||||
/* add it to the end */
|
||||
opal_list_append(nodes, &node->super);
|
||||
sNodes++; /* Increment the node count */
|
||||
}
|
||||
#endif
|
||||
break; /* Extended details ignored */
|
||||
}
|
||||
|
||||
opal_list_sort (nodes, compare_nodes);
|
||||
|
||||
free(cpBuf); /* Free the buffer */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user