1
1
The alps ras and plm components were broken by recent changes in ORTE. This
commit resolves those issues.

Changes:

 - Define PMI2_SUCCESS if it isn't defined. This fixes a problem with Cray's
   PMI implementation which does not define (for some reason) PMI2_SUCCESS. We
   had previously just used PMI_SUCCESS.

 - Add missing definition and a typo in pml_alps_module.

 - launch_id is no longer available in the orte_node_t structure. Use the
   attribute lookup to get the value.

 - Do not use an O(n^2) sorting algorithm when putting alps nodes in order. Use
   opal_list_sort instead (O(nlogn)).

This commit was SVN r32076.
Этот коммит содержится в:
Nathan Hjelm 2014-06-24 21:29:04 +00:00
родитель bce33635a7
Коммит 563eaf0726
3 изменённых файлов: 35 добавлений и 36 удалений

Просмотреть файл

@ -26,6 +26,11 @@
#include <pmi.h>
#if WANT_PMI2_SUPPORT
#include <pmi2.h>
#if !defined(PMI2_SUCCESS)
#define PMI2_SUCCESS PMI_SUCCESS
#endif
#endif
#include "common_pmi.h"

Просмотреть файл

@ -374,7 +374,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
the ALPS plm) */
cur_prefix = NULL;
for (i=0; i < state->jdata->apps->size; i++) {
char *app_prefix_dir;
char *app_prefix_dir = NULL;
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(state->jdata->apps, i))) {
continue;
}
@ -544,6 +544,7 @@ static int plm_alps_start_proc(int argc, char **argv, char **env,
char *prefix)
{
int fd;
pid_t alps_pid;
char *exec_argv = opal_path_findv(argv[0], 0, env, NULL);
if (NULL == exec_argv) {
@ -559,7 +560,7 @@ static int plm_alps_start_proc(int argc, char **argv, char **env,
alpsrun = OBJ_NEW(orte_proc_t);
alpsrun->pid = alps_pid;
/* be sure to mark it as alive so we don't instantly fire */
ORTE_FLAG_SET(dummy, ORTE_PROC_FLAG_ALIVE);
ORTE_FLAG_SET(alpsrun, ORTE_PROC_FLAG_ALIVE);
/* setup the waitpid so we can find out if alps succeeds! */
orte_wait_cb(alpsrun, alps_wait_cb, NULL);

Просмотреть файл

@ -365,6 +365,25 @@ ras_alps_getline(FILE *fp)
return NULL;
}
static int compare_nodes (opal_list_item_t **a, opal_list_item_t **b)
{
orte_node_t *nodea = (orte_node_t *) *a;
orte_node_t *nodeb = (orte_node_t *) *b;
int32_t launcha, launchb, *ldptr;
ldptr = &launcha;
if (!orte_get_attribute(&nodea->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) {
return 0;
}
ldptr = &launchb;
if (!orte_get_attribute(&nodea->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) {
return 0;
}
return (launcha > launchb) ? 1 : -1;
}
static int
orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
unsigned int *uMe)
@ -392,7 +411,6 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
#else
placeNodeList_t *apNodes;
#endif
bool added;
opal_list_item_t *item;
orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts);
@ -529,22 +547,8 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
/* need to order these node ids so the regex generator
* can properly function
*/
added = false;
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
n2 = (orte_node_t*)item;
if (node->launch_id < n2->launch_id) {
/* insert the new node before this one */
opal_list_insert_pos(nodes, item, &node->super);
added = true;
break;
}
}
if (!added) {
/* add it to the end */
opal_list_append(nodes, &node->super);
}
/* add it to the end */
opal_list_append(nodes, &node->super);
sNodes++; /* Increment the node count */
}
}
@ -572,34 +576,23 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
node = OBJ_NEW(orte_node_t);
node->name = hostname;
orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apSlots[ix].nid, OPAL_INT32);
orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apNodes[ix].nid, OPAL_INT32);
node->slots_inuse = 0;
node->slots_max = 0;
node->slots = apNodes[ix].numPEs;
/* need to order these node ids so the regex generator
* can properly function
*/
added = false;
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
n2 = (orte_node_t*)item;
if (node->launch_id < n2->launch_id) {
/* insert the new node before this one */
opal_list_insert_pos(nodes, item, &node->super);
added = true;
break;
}
}
if (!added) {
/* add it to the end */
opal_list_append(nodes, &node->super);
}
/* add it to the end */
opal_list_append(nodes, &node->super);
sNodes++; /* Increment the node count */
}
#endif
break; /* Extended details ignored */
}
opal_list_sort (nodes, compare_nodes);
free(cpBuf); /* Free the buffer */
return ORTE_SUCCESS;