cf6137b530
Bring Slurm PMI-1 component online Bring the s2 component online Little cleanup - let the various PMIx modules set the process name during init, and then just raise it up to the ORTE level. Required as the different PMI environments all pass the jobid in different ways. Bring the OMPI pubsub/pmi component online Get comm_spawn working again Ensure we always provide a cpuset, even if it is NULL pmix/cray: adjust cray pmix component for pmix Make changes so cray pmix can work within the integrated ompi/pmix framework. Bring singletons back online. Implement the comm_spawn operation using pmix - not tested yet Cleanup comm_spawn - procs now starting, error in connect_accept Complete integration
119 строки
3.0 KiB
C
119 строки
3.0 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "common_ugni.h"
|
|
#include "opal/mca/pmix/pmix.h"
|
|
|
|
OBJ_CLASS_INSTANCE(opal_common_ugni_endpoint_t, opal_object_t, NULL, NULL);
|
|
|
|
int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_proc_t *peer_proc,
|
|
opal_common_ugni_endpoint_t **ep)
|
|
{
|
|
opal_common_ugni_endpoint_t *endpoint;
|
|
opal_common_ugni_modex_t *modex;
|
|
size_t msg_size;
|
|
int rc;
|
|
|
|
assert (NULL != dev && NULL != ep && peer_proc);
|
|
|
|
endpoint = OBJ_NEW(opal_common_ugni_endpoint_t);
|
|
if (OPAL_UNLIKELY(NULL == endpoint)) {
|
|
assert (0);
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
/* Receive the modex */
|
|
OPAL_MODEX_RECV(rc, &opal_common_ugni_component,
|
|
&peer_proc->proc_name, (void **)&modex, &msg_size);
|
|
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
|
OPAL_OUTPUT((-1, "btl/ugni error receiving modex"));
|
|
return rc;
|
|
}
|
|
|
|
endpoint->ep_rem_addr = modex->addr;
|
|
endpoint->ep_rem_id = modex->id;
|
|
endpoint->ep_rem_irq_memhndl = modex->irq_memhndl;
|
|
|
|
endpoint->dev = dev;
|
|
|
|
*ep = endpoint;
|
|
|
|
free (modex);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
void opal_common_ugni_endpoint_return (opal_common_ugni_endpoint_t *ep)
|
|
{
|
|
assert(NULL != ep);
|
|
|
|
OBJ_RELEASE(ep);
|
|
}
|
|
|
|
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq,
|
|
gni_ep_handle_t *ep_handle)
|
|
{
|
|
gni_return_t grc;
|
|
|
|
if (OPAL_UNLIKELY(NULL == cep)) {
|
|
assert (0);
|
|
return OPAL_ERR_BAD_PARAM;
|
|
}
|
|
|
|
/* create a uGNI endpoint handle and bind it to the remote peer */
|
|
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
|
|
grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle);
|
|
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
|
return opal_common_rc_ugni_to_opal (grc);
|
|
}
|
|
|
|
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
|
|
grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
|
|
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
|
|
|
|
if (GNI_RC_SUCCESS != grc) {
|
|
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
|
|
GNI_EpDestroy (*ep_handle);
|
|
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
|
|
return opal_common_rc_ugni_to_opal (grc);
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep)
|
|
{
|
|
int rc;
|
|
|
|
if (NULL == ep || 0 == *ep) {
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
/* TODO: need to fix, may be outstanding tx's, etc. */
|
|
rc = GNI_EpUnbind (*ep);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
/* should warn */
|
|
}
|
|
|
|
GNI_EpDestroy (*ep);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
/* should warn */
|
|
}
|
|
|
|
*ep = 0;
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|