1
1
openmpi/orte/mca/odls/alps/odls_alps_utils.c
Nathan Hjelm b68d66bb9b MCA: Add the project/project version to the MCA base component
This commit adds support for project_framework_component_* parameter
matching. This is the first step in allowing the same framework name
in multiple projects. This change also bumps the MCA component version
to 2.1.0.

All master frameworks have been updated to use the new component
versioning macro. An mca.h has been added to each project to add a
project specific versioning macro of the form
PROJECT_MCA_VERSION_2_1_0.

Signed-off-by: Nathan Hjelm <hjelmn@me.com>
2015-03-27 10:59:04 -06:00

253 строки
9.1 KiB
C

/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <ctype.h>
#include <sys/syscall.h>
#include "orte/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/opal_environ.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/odls/base/base.h"
#include "orte/mca/odls/base/odls_private.h"
#include "orte/mca/odls/alps/odls_alps.h"
int orte_odls_alps_get_rdma_creds(void)
{
int alps_status = 0, num_creds, i, len;
uint64_t apid;
size_t alps_count;
int ret = ORTE_SUCCESS;
alpsAppLLIGni_t *rdmacred_rsp=NULL;
alpsAppGni_t *rdmacred_buf;
char *ptr;
char env_buffer[1024];
static int already_got_creds = 0;
/*
* If we already put the GNI RDMA credentials into orte_launch_environ,
* no need to do anything.
* TODO: kind of ugly, need to implement an opal_getenv
*/
if (1 == already_got_creds) {
return ORTE_SUCCESS;
}
/*
* get the Cray HSN RDMA credentials here and stuff them in to the
* PMI env variable format expected by uGNI consumers like the uGNI
* BTL, etc. Stuff into the orte_launch_environ to make sure the
* application processes can actually use the HSN API (uGNI).
*/
if (ORTE_PROC_IS_DAEMON) {
ret = alps_app_lli_lock();
/*
* First get our apid
*/
ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: alps_app_lli_put_request returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit;
}
ret = alps_app_lli_get_response (&alps_status, &alps_count);
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: alps_app_lli_get_response returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit;
}
ret = alps_app_lli_get_response_bytes (&apid, sizeof(apid));
if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: alps_app_lli_get_response_bytes returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit;
}
/*
* now get the GNI rdma credentials info
*/
ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_GNI, NULL, 0);
if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: alps_app_lli_put_request returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit;
}
ret = alps_app_lli_get_response(&alps_status, &alps_count);
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: alps_app_lli_get_response returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit;
}
rdmacred_rsp = (alpsAppLLIGni_t *)malloc(alps_count);
if (NULL == rdmacred_rsp) {
ret = ORTE_ERR_OUT_OF_RESOURCE;
goto fn_exit;
}
memset(rdmacred_rsp,0,alps_count);
ret = alps_app_lli_get_response_bytes(rdmacred_rsp, alps_count);
if (ALPS_APP_LLI_ALPS_STAT_OK != ret) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: alps_app_lli_get_response_bytes returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
free(rdmacred_rsp);
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit;
}
ret = alps_app_lli_unlock();
rdmacred_buf = (alpsAppGni_t *)(rdmacred_rsp->u.buf);
/*
* now set up the env. variables -
* The cray pmi sets up 4 environment variables:
* PMI_GNI_DEV_ID - format (id0:id1....idX)
* PMI_GNI_LOC_ADDR - format (locaddr0:locaddr1:....locaddrX)
* PMI_GNI_COOKIE - format (cookie0:cookie1:...cookieX)
* PMI_GNI_PTAG - format (ptag0:ptag1:....ptagX)
*
* where X == num_creds - 1
*
* TODO: need in theory to check for possible overrun of env_buffer
*/
num_creds = rdmacred_rsp->count;
/*
* first build ptag env
*/
memset(env_buffer,0,sizeof(env_buffer));
ptr = env_buffer;
for (i=0; i<num_creds-1; i++) {
len = sprintf(ptr,"%d:",rdmacred_buf[i].ptag);
ptr += len;
}
sprintf(ptr,"%d",rdmacred_buf[num_creds-1].ptag);
ret = opal_setenv("PMI_GNI_PTAG", env_buffer, false, &orte_launch_environ);
if (ret != ORTE_SUCCESS) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: opal_setenv for PMI_GNI_TAG returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
goto fn_exit;
}
/*
* the cookie env
*/
memset(env_buffer,0,sizeof(env_buffer));
ptr = env_buffer;
for (i=0; i<num_creds-1; i++) {
len = sprintf(ptr,"%d:",rdmacred_buf[i].cookie);
ptr += len;
}
sprintf(ptr,"%d",rdmacred_buf[num_creds-1].cookie);
ret = opal_setenv("PMI_GNI_COOKIE", env_buffer, false, &orte_launch_environ);
if (ret != ORTE_SUCCESS) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: opal_setenv for PMI_GNI_COOKIE returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
goto fn_exit;
}
/*
* nic loc addrs
*/
memset(env_buffer,0,sizeof(env_buffer));
ptr = env_buffer;
for (i=0; i<num_creds-1; i++) {
len = sprintf(ptr,"%d:",rdmacred_buf[i].local_addr);
ptr += len;
}
sprintf(ptr,"%d",rdmacred_buf[num_creds-1].local_addr);
ret = opal_setenv("PMI_GNI_LOC_ADDR", env_buffer, false, &orte_launch_environ);
if (ret != ORTE_SUCCESS) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: opal_setenv for PMI_GNI_LOC_ADDR returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
goto fn_exit;
}
/*
* finally device ids
*/
memset(env_buffer,0,sizeof(env_buffer));
ptr = env_buffer;
for (i=0; i<num_creds-1; i++) {
len = sprintf(ptr,"%d:",rdmacred_buf[i].device_id);
ptr += len;
}
sprintf(ptr,"%d",rdmacred_buf[num_creds-1].device_id);
ret = opal_setenv("PMI_GNI_DEV_ID", env_buffer, false, &orte_launch_environ);
if (ret != ORTE_SUCCESS) {
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:alps: opal_setenv for PMI_GNI_DEV_ID returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
goto fn_exit;
}
}
fn_exit:
if (ORTE_SUCCESS == ret) already_got_creds = 1;
return ret;
}