307 строки
9.3 KiB
C
307 строки
9.3 KiB
C
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||
|
/*
|
||
|
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||
|
* reserved.
|
||
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||
|
* Copyright (c) 2014 Research Organization for Information Science
|
||
|
* and Technology (RIST). All rights reserved.
|
||
|
* $COPYRIGHT$
|
||
|
*
|
||
|
* Additional copyrights may follow
|
||
|
*
|
||
|
* $HEADER$
|
||
|
*/
|
||
|
|
||
|
|
||
|
#include "btl_ugni.h"
|
||
|
#include "btl_ugni_endpoint.h"
|
||
|
|
||
|
#include "opal/class/opal_list.h"
|
||
|
#include "opal/dss/dss.h"
|
||
|
#include "opal/mca/pmix/pmix.h"
|
||
|
#include "opal/util/bit_ops.h"
|
||
|
|
||
|
static inline int get_ptag(uint8_t *out_ptag)
|
||
|
{
|
||
|
/* TODO no need for tmp */
|
||
|
char *ptr;
|
||
|
uint8_t tmp_ptag;
|
||
|
|
||
|
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
|
||
|
/* TODO add err msg - better rc? */
|
||
|
return OPAL_ERR_NOT_FOUND;
|
||
|
}
|
||
|
errno = 0;
|
||
|
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
|
||
|
if (0 != errno) {
|
||
|
/* TODO add err msg - better rc? */
|
||
|
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
||
|
}
|
||
|
*out_ptag = tmp_ptag;
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static inline int get_cookie (uint32_t *out_cookie)
|
||
|
{
|
||
|
/* TODO no need for tmp */
|
||
|
char *ptr;
|
||
|
uint32_t tmp_cookie;
|
||
|
|
||
|
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
|
||
|
/* TODO add err msg - better rc? */
|
||
|
return OPAL_ERR_NOT_FOUND;
|
||
|
}
|
||
|
errno = 0;
|
||
|
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
|
||
|
if (0 != errno) {
|
||
|
/* TODO add err msg - better rc? */
|
||
|
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
||
|
}
|
||
|
|
||
|
*out_cookie = tmp_cookie;
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static unsigned int mca_btl_ugni_get_nic_address(int device_id)
|
||
|
{
|
||
|
unsigned int address, cpu_id;
|
||
|
gni_return_t status;
|
||
|
int i, alps_dev_id = -1;
|
||
|
char *token,*p_ptr;
|
||
|
|
||
|
p_ptr = getenv("PMI_GNI_DEV_ID");
|
||
|
if (!p_ptr) {
|
||
|
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
|
||
|
if(status != GNI_RC_SUCCESS) {
|
||
|
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
|
||
|
return (unsigned int)-1;
|
||
|
}
|
||
|
return address;
|
||
|
}
|
||
|
|
||
|
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||
|
alps_dev_id = atoi(token);
|
||
|
if (alps_dev_id == device_id) {
|
||
|
break;
|
||
|
}
|
||
|
p_ptr = NULL;
|
||
|
}
|
||
|
|
||
|
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
|
||
|
return (unsigned int)-1;
|
||
|
}
|
||
|
|
||
|
p_ptr = getenv("PMI_GNI_LOC_ADDR");
|
||
|
if (OPAL_UNLIKELY(NULL == p_ptr)) {
|
||
|
return (unsigned int)-1;
|
||
|
}
|
||
|
|
||
|
i = 0;
|
||
|
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||
|
if (i == alps_dev_id) {
|
||
|
return strtoul (token, NULL, 10);
|
||
|
}
|
||
|
p_ptr = NULL;
|
||
|
++i;
|
||
|
}
|
||
|
|
||
|
return (unsigned int)-1;
|
||
|
}
|
||
|
|
||
|
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id)
|
||
|
{
|
||
|
uint32_t dev_pe_addr;
|
||
|
int rc;
|
||
|
|
||
|
OBJ_CONSTRUCT(&device->endpoints, opal_free_list_t);
|
||
|
OBJ_CONSTRUCT(&device->pending_post, opal_list_t);
|
||
|
|
||
|
rc = opal_free_list_init (&device->endpoints, sizeof (mca_btl_ugni_endpoint_handle_t),
|
||
|
8, OBJ_CLASS(mca_btl_ugni_endpoint_handle_t), 0, 8, 0,
|
||
|
mca_btl_ugni_component.local_cq_size, 16,
|
||
|
NULL, 0, NULL, mca_btl_ugni_endpoint_handle_init_rdma,
|
||
|
(void *) device);
|
||
|
if (OPAL_SUCCESS != rc) {
|
||
|
OBJ_DESTRUCT(&device->endpoints);
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/* create a communication domain */
|
||
|
rc = GNI_CdmCreate (mca_btl_ugni_component.cdm_id_base | virtual_device_id, mca_btl_ugni_component.ptag,
|
||
|
mca_btl_ugni_component.cookie, mca_btl_ugni_component.cdm_flags, &device->dev_cd_handle);
|
||
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||
|
/* this REALLY is an error but under alps + mapn we may not get any credentials */
|
||
|
BTL_VERBOSE(("Error: Creating communication domain %d for virtual device %d", rc, virtual_device_id));
|
||
|
return mca_btl_rc_ugni_to_opal (rc);
|
||
|
}
|
||
|
|
||
|
device->dev_index = virtual_device_id;
|
||
|
|
||
|
/* Create a NIC Adress */
|
||
|
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", mca_btl_ugni_component.dev_addr, 0));
|
||
|
|
||
|
/* Attach device to the communication domain */
|
||
|
rc = GNI_CdmAttach (device->dev_cd_handle, 0, &dev_pe_addr, &device->dev_handle);
|
||
|
if (GNI_RC_SUCCESS != rc) {
|
||
|
BTL_VERBOSE(("Error: Attaching to communication domain. rc = %d, virtual device = %d", rc, virtual_device_id));
|
||
|
return mca_btl_rc_ugni_to_opal (rc);
|
||
|
}
|
||
|
|
||
|
device->lock = 0;
|
||
|
device->dev_rdma_local_cq.gni_handle = 0;
|
||
|
device->dev_rdma_local_cq.active_operations = 0;
|
||
|
device->dev_rdma_local_irq_cq.gni_handle = 0;
|
||
|
device->dev_rdma_local_irq_cq.active_operations = 0;
|
||
|
device->dev_smsg_local_cq.gni_handle = 0;
|
||
|
device->dev_smsg_local_cq.active_operations= 0;
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev)
|
||
|
{
|
||
|
int rc;
|
||
|
|
||
|
OBJ_DESTRUCT(&dev->endpoints);
|
||
|
OBJ_DESTRUCT(&dev->pending_post);
|
||
|
|
||
|
if (0 != dev->dev_rdma_local_cq.gni_handle) {
|
||
|
GNI_CqDestroy (dev->dev_rdma_local_cq.gni_handle);
|
||
|
dev->dev_rdma_local_cq.gni_handle = 0;
|
||
|
}
|
||
|
|
||
|
if (0 != dev->dev_rdma_local_irq_cq.gni_handle) {
|
||
|
GNI_CqDestroy (dev->dev_rdma_local_irq_cq.gni_handle);
|
||
|
dev->dev_rdma_local_irq_cq.gni_handle = 0;
|
||
|
}
|
||
|
|
||
|
if (0 != dev->dev_smsg_local_cq.gni_handle) {
|
||
|
GNI_CqDestroy (dev->dev_smsg_local_cq.gni_handle);
|
||
|
dev->dev_smsg_local_cq.gni_handle = 0;
|
||
|
}
|
||
|
|
||
|
rc = GNI_CdmDestroy (dev->dev_cd_handle);
|
||
|
if (GNI_RC_SUCCESS != rc) {
|
||
|
BTL_VERBOSE(("error destroying cdm handle"));
|
||
|
}
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Send local device information and other information
|
||
|
* required for setup
|
||
|
*/
|
||
|
static int mca_btl_ugni_send_modex (void)
|
||
|
{
|
||
|
struct mca_btl_ugni_modex_t modex;
|
||
|
uint32_t modex_size;
|
||
|
char *modex_msg;
|
||
|
int rc;
|
||
|
|
||
|
modex_size = sizeof (struct mca_btl_ugni_modex_t);
|
||
|
|
||
|
modex_msg = (char *) malloc (modex_size);
|
||
|
if (NULL == modex_msg) {
|
||
|
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
|
||
|
__FILE__, __LINE__));
|
||
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
||
|
}
|
||
|
|
||
|
modex.addr = mca_btl_ugni_component.dev_addr;
|
||
|
modex.id = mca_btl_ugni_component.cdm_id_base;
|
||
|
|
||
|
BTL_VERBOSE(("sending modex. addr: %d, id: %d", modex.addr, modex.id));
|
||
|
|
||
|
memcpy ((void *) modex_msg, (void *) &modex, modex_size);
|
||
|
|
||
|
/*
|
||
|
* need global for edge cases like MPI_Comm_spawn support with
|
||
|
* new ranks started on the same nodes as the spawnee ranks, etc.
|
||
|
*/
|
||
|
|
||
|
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
|
||
|
&mca_btl_ugni_component.super.btl_version,
|
||
|
modex_msg, modex_size);
|
||
|
|
||
|
free (modex_msg);
|
||
|
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
int mca_btl_ugni_fini (void)
|
||
|
{
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int mca_btl_ugni_init (void)
|
||
|
{
|
||
|
int32_t pid_max = 32768;
|
||
|
int rc, bit;
|
||
|
FILE *fh;
|
||
|
|
||
|
if (0 == mca_btl_ugni_component.virtual_device_count) {
|
||
|
/* XXX -- TODO -- might want to improve this logic. One option would be to
|
||
|
* compare the number of local peers vs the number of cores or hyperthreads
|
||
|
* on the node. */
|
||
|
|
||
|
if (!opal_using_threads() || opal_process_info.num_local_peers >= 255) {
|
||
|
/* there is probably no benefit to using multiple device contexts when not
|
||
|
* using threads. */
|
||
|
mca_btl_ugni_component.virtual_device_count = 1;
|
||
|
} else if (opal_process_info.num_local_peers >= 127) {
|
||
|
mca_btl_ugni_component.virtual_device_count = 2;
|
||
|
} else if (opal_process_info.num_local_peers >= 63) {
|
||
|
mca_btl_ugni_component.virtual_device_count = 4;
|
||
|
} else if (opal_process_info.num_local_peers >= 31) {
|
||
|
mca_btl_ugni_component.virtual_device_count = 8;
|
||
|
} else {
|
||
|
mca_btl_ugni_component.virtual_device_count = 16;
|
||
|
}
|
||
|
} else if (MCA_BTL_UGNI_MAX_DEV_HANDLES < mca_btl_ugni_component.virtual_device_count) {
|
||
|
mca_btl_ugni_component.virtual_device_count = MCA_BTL_UGNI_MAX_DEV_HANDLES;
|
||
|
}
|
||
|
|
||
|
fh = fopen ("/proc/sys/kernel/pid_max", "r");
|
||
|
if (NULL != fh) {
|
||
|
fscanf (fh, "%d", &pid_max);
|
||
|
fclose (fh);
|
||
|
}
|
||
|
|
||
|
/* Use pid to generate the cdm_id. Although its not stated in the uGNI
|
||
|
* documentation, the cdm_id only needs to be unique within a node for a
|
||
|
* given ptag/cookie tuple */
|
||
|
bit = opal_hibit (pid_max, 31);
|
||
|
if (bit >= 31) {
|
||
|
mca_btl_ugni_component.virtual_device_count = 1;
|
||
|
mca_btl_ugni_component.cdm_id_base = getpid();
|
||
|
} else if (bit >= 30 && mca_btl_ugni_component.virtual_device_count > 2) {
|
||
|
mca_btl_ugni_component.virtual_device_count = 2;
|
||
|
mca_btl_ugni_component.cdm_id_base = getpid() << 1;
|
||
|
} else {
|
||
|
mca_btl_ugni_component.cdm_id_base = getpid() << 8;
|
||
|
}
|
||
|
|
||
|
/* Create a communication domain */
|
||
|
/* collect uGNI information */
|
||
|
rc = get_ptag(&mca_btl_ugni_component.ptag);
|
||
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
rc = get_cookie(&mca_btl_ugni_component.cookie);
|
||
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
/* get the device address of the NIC */
|
||
|
mca_btl_ugni_component.dev_addr = mca_btl_ugni_get_nic_address (0);
|
||
|
|
||
|
/* send ugni modex */
|
||
|
mca_btl_ugni_send_modex ();
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|