1
1
openmpi/orte/mca/rmaps/proxy/rmaps_proxy.c
Ralph Castain f4a458532b This doesn't totally resolve the comm_spawn problem, but it helps a little. I'll continue working on it and hope to resolve it completely shortly. The issue primarily centers on where to start mapping the child job's processes, and how to deal with oversubscription that might result. At the moment, I am trying to resolve the first issue first (hey, that even sounds right!).
This change does a couple of things:

1. Since the USE_PARENT_ALLOC attribute is a directive about regarding allocation of resources to a job, it more properly should be an attribute of the RAS. Change the name to reflect that and move the attribute define to the ras_types.h file.

2. Add the attributes list to the RMAPS map_job interface. This provides us with the desired flexibility to dynamically specify directives for mapping. The system will - in the absence of any attribute-based directive - default to the values provided in the MCA parameters (either from environment or command-line interface).

This commit was SVN r12164.
2006-10-18 14:01:44 +00:00

113 строки
3.2 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/dss/dss.h"
#include "orte/runtime/runtime.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/proxy/rmaps_proxy.h"
/*
* Map a job
*/
int orte_rmaps_proxy_map(orte_jobid_t job, opal_list_t *attributes)
{
orte_buffer_t* cmd;
orte_buffer_t* answer;
orte_rmaps_cmd_flag_t command;
orte_std_cntr_t count;
int rc;
command = ORTE_RMAPS_MAP_CMD;
cmd = OBJ_NEW(orte_buffer_t);
if (cmd == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* pack the command */
if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_RMAPS_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
/* pack the jobid */
if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
/* pack the attributes */
if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, attributes, 1, ORTE_ATTR_LIST))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
/* send the request */
if (0 > orte_rml.send_buffer(orte_rmaps_proxy_globals.replica, cmd, ORTE_RML_TAG_RMAPS, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);
return ORTE_ERR_COMM_FAILURE;
}
OBJ_RELEASE(cmd);
/* setup a buffer for the answer */
answer = OBJ_NEW(orte_buffer_t);
if(answer == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* enter a blocking receive until we hear back */
if (0 > orte_rml.recv_buffer(orte_rmaps_proxy_globals.replica, answer, ORTE_RML_TAG_RMAPS)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
}
count = 1;
if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_RMAPS_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(answer);
return rc;
}
/* check that this is the right command */
if (ORTE_RMAPS_MAP_CMD != command) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
}
/* clean up and leave */
OBJ_RELEASE(answer);
return ORTE_SUCCESS;
}