ugni: don't release eager fragments until we get local smsg completion
This commit was SVN r25796.
Этот коммит содержится в:
родитель
60121664d1
Коммит
97dad0ac49
@ -1,6 +1,6 @@
|
||||
# -*- indent-tabs-mode:nil -*-
|
||||
#
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
#
|
||||
@ -33,7 +33,6 @@ ugni_SOURCES = \
|
||||
btl_ugni_frag.h \
|
||||
btl_ugni_rdma.h \
|
||||
btl_ugni_send.c \
|
||||
btl_ugni_sendi.c \
|
||||
btl_ugni_put.c \
|
||||
btl_ugni_get.c \
|
||||
btl_ugni.h
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -298,13 +298,12 @@ static inline void mca_btl_ugni_callback_reverse_get (mca_btl_base_module_t *btl
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
|
||||
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
||||
|
||||
BTL_VERBOSE(("reverse get (put) for rem_ctx %p complete", des->des_cbdata));
|
||||
|
||||
/* tell peer the put is complete */
|
||||
rc = GNI_SmsgSendWTag (frag->endpoint->common->ep_handle, &des->des_cbdata, sizeof (void *),
|
||||
NULL, 0, msg_id, MCA_BTL_UGNI_TAG_PUT_COMPLETE);
|
||||
NULL, 0, -1, MCA_BTL_UGNI_TAG_PUT_COMPLETE);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* turn off btl ownership for now */
|
||||
des->des_flags &= ~MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
||||
@ -357,16 +356,22 @@ mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
int count = 0;
|
||||
int rc;
|
||||
|
||||
/* loop until the mailbox is empty */
|
||||
/* per uGNI documentation we loop until the mailbox is empty */
|
||||
do {
|
||||
uint8_t tag = GNI_SMSG_ANY_TAG;
|
||||
|
||||
rc = GNI_SmsgGetNextWTag (ep->common->ep_handle, (void **) &data_ptr, &tag);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
if (GNI_RC_NOT_DONE == rc) {
|
||||
BTL_VERBOSE(("no smsg message waiting. rc = %d", rc));
|
||||
break;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
fprintf (stderr, "Unhandled Smsg error: %d\n", rc);
|
||||
assert (0);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(0 == data_ptr)) {
|
||||
BTL_ERROR(("null data ptr!"));
|
||||
return OMPI_ERROR;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -23,12 +23,14 @@ OBJ_CLASS_INSTANCE(mca_btl_base_endpoint_t, opal_object_t,
|
||||
static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
OBJ_CONSTRUCT(&ep->pending_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ep->pending_smsg_sends, opal_list_t);
|
||||
ep->smsgs_waiting = false;
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
OBJ_DESTRUCT(&ep->pending_list);
|
||||
OBJ_DESTRUCT(&ep->pending_smsg_sends);
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -37,6 +37,7 @@ struct mca_btl_base_endpoint_t {
|
||||
mca_btl_ugni_smsg_mbox_t *mailbox;
|
||||
|
||||
opal_list_t pending_list;
|
||||
opal_list_t pending_smsg_sends;
|
||||
|
||||
/* true if a frag was received before the connection was complete */
|
||||
bool smsgs_waiting;
|
||||
@ -66,6 +67,7 @@ static inline int mca_btl_ugni_init_ep (mca_btl_base_endpoint_t **ep,
|
||||
}
|
||||
|
||||
endpoint->btl = btl;
|
||||
endpoint->common->btl_ctx = (void *) endpoint;
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -33,6 +33,7 @@ struct mca_btl_ugni_base_frag_t {
|
||||
ompi_free_list_t *my_list;
|
||||
mca_btl_ugni_module_t *btl;
|
||||
int tries;
|
||||
uint32_t msg_id;
|
||||
};
|
||||
|
||||
typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_base_frag_t;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -80,7 +80,7 @@ mca_btl_ugni_module_t mca_btl_ugni_module = {
|
||||
mca_btl_ugni_prepare_src,
|
||||
mca_btl_ugni_prepare_dst,
|
||||
mca_btl_ugni_send,
|
||||
mca_btl_ugni_sendi,
|
||||
NULL, /* sendi */
|
||||
mca_btl_ugni_put,
|
||||
mca_btl_ugni_get,
|
||||
NULL, /* mca_btl_base_dump, */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -104,7 +104,6 @@ static inline int mca_btl_ugni_start_reverse_get (struct mca_btl_base_module_t *
|
||||
mca_btl_ugni_base_frag_t *frag) {
|
||||
/* off alignment/off size. switch to put */
|
||||
mca_btl_base_segment_t segments[2];
|
||||
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
||||
void *post_desc_ptr = &(frag->post_desc);
|
||||
int rc;
|
||||
|
||||
@ -113,7 +112,7 @@ static inline int mca_btl_ugni_start_reverse_get (struct mca_btl_base_module_t *
|
||||
|
||||
rc = GNI_SmsgSendWTag (frag->endpoint->common->ep_handle, segments,
|
||||
sizeof (segments), &post_desc_ptr, sizeof (void *),
|
||||
msg_id, MCA_BTL_UGNI_TAG_PUT_INIT);
|
||||
-1, MCA_BTL_UGNI_TAG_PUT_INIT);
|
||||
if (OPAL_UNLIKELY(rc == GNI_RC_NOT_DONE)) {
|
||||
/* send this smsg packet later */
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -14,12 +14,44 @@
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
void mca_btl_ugni_local_smsg_complete (void *btl_ctx, uint32_t msg_id, int rc)
|
||||
{
|
||||
mca_btl_base_endpoint_t *btl_peer = (mca_btl_base_endpoint_t *) btl_ctx;
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
opal_list_item_t *item;
|
||||
|
||||
for (item = opal_list_get_first (&btl_peer->pending_smsg_sends) ;
|
||||
item != opal_list_get_end (&btl_peer->pending_smsg_sends) ;
|
||||
item = opal_list_get_next (item)) {
|
||||
frag = (mca_btl_ugni_base_frag_t *) item;
|
||||
if (frag->msg_id == msg_id) {
|
||||
opal_list_remove_item (&btl_peer->pending_smsg_sends, item);
|
||||
break;
|
||||
}
|
||||
frag = NULL;
|
||||
}
|
||||
|
||||
if (!frag) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* completion callback */
|
||||
if (NULL != frag->base.des_cbfunc) {
|
||||
frag->base.des_cbfunc(&btl_peer->btl->super, btl_peer, &frag->base, rc);
|
||||
}
|
||||
|
||||
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
|
||||
MCA_BTL_UGNI_FRAG_RETURN (frag);
|
||||
}
|
||||
}
|
||||
|
||||
int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *btl_peer,
|
||||
struct mca_btl_base_descriptor_t *descriptor,
|
||||
mca_btl_base_tag_t tag)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor;
|
||||
static uint8_t msg_num = 0;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %d", (void *)descriptor,
|
||||
@ -37,11 +69,12 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
|
||||
frag->hdr->tag = tag;
|
||||
frag->hdr->len = frag->segments[0].seg_len;
|
||||
frag->msg_id = (btl_peer->common->ep_rem_id & 0x00ffffff) | ((uint32_t)msg_num++ << 24) ;
|
||||
|
||||
/* check endpoint state */
|
||||
rc = GNI_SmsgSendWTag (btl_peer->common->ep_handle, frag->hdr,
|
||||
descriptor->des_src->seg_len + sizeof (frag->hdr[0]),
|
||||
NULL, 0, -1, MCA_BTL_UGNI_TAG_SEND);
|
||||
sizeof (frag->hdr[0]), descriptor->des_src->seg_addr.pval,
|
||||
descriptor->des_src->seg_len, frag->msg_id, MCA_BTL_UGNI_TAG_SEND);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("GNI_SmsgSendWTag failed with rc = %d", rc));
|
||||
|
||||
@ -54,14 +87,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags) {
|
||||
/* completion callback */
|
||||
frag->base.des_cbfunc(&btl_peer->btl->super, btl_peer, &frag->base, OMPI_SUCCESS);
|
||||
}
|
||||
opal_list_append (&btl_peer->pending_smsg_sends, (opal_list_item_t *) frag);
|
||||
|
||||
if (descriptor->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
|
||||
MCA_BTL_UGNI_FRAG_RETURN (frag);
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
@ -23,7 +23,6 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_base_descriptor_t **descriptor)
|
||||
{
|
||||
size_t length = header_size + payload_size;
|
||||
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
uint32_t iov_count = 1;
|
||||
void *data_ptr = NULL;
|
||||
@ -70,16 +69,17 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
assert (max_data == payload_size);
|
||||
|
||||
header_size += payload_size;
|
||||
payload_size = 0;
|
||||
} else if (payload_size) {
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
memmove ((uintptr_t)frag->segments[0].seg_addr.pval + header_size, data_ptr, payload_size);
|
||||
}
|
||||
|
||||
header_size += sizeof (frag->hdr[0]);
|
||||
frag->base.des_cbfunc = NULL;
|
||||
frag->msg_id = endpoint->common->ep_rem_id & 0x00ffffff;
|
||||
|
||||
/* send message */
|
||||
rc = GNI_SmsgSendWTag (endpoint->common->ep_handle, frag->hdr, header_size,
|
||||
data_ptr, payload_size, msg_id, MCA_BTL_UGNI_TAG_SEND);
|
||||
rc = GNI_SmsgSendWTag (endpoint->common->ep_handle, frag->hdr, sizeof (frag->hdr[0]),
|
||||
frag->segments[0].seg_addr.pval, length, frag->msg_id, MCA_BTL_UGNI_TAG_SEND);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("GNI_SmsgSendWTag failed with rc = %d", rc));
|
||||
MCA_BTL_UGNI_FRAG_RETURN (frag);
|
||||
@ -88,7 +88,7 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
MCA_BTL_UGNI_FRAG_RETURN (frag);
|
||||
opal_list_append (&endpoint->pending_smsg_sends, (opal_list_item_t *) frag);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -121,6 +121,8 @@ int ompi_common_ugni_init (void);
|
||||
*/
|
||||
int ompi_common_ugni_fini (void);
|
||||
|
||||
extern void mca_btl_ugni_local_smsg_complete (void *, uint32_t, int);
|
||||
|
||||
static inline int
|
||||
ompi_common_ugni_process_completed_post (ompi_common_ugni_device_t *dev,
|
||||
gni_cq_handle_t cq_handle) {
|
||||
@ -130,8 +132,7 @@ ompi_common_ugni_process_completed_post (ompi_common_ugni_device_t *dev,
|
||||
uint32_t recoverable = 1;
|
||||
|
||||
rc = GNI_CqGetEvent (cq_handle, &event_data);
|
||||
if (GNI_RC_NOT_DONE == rc || GNI_CQ_GET_TYPE(event_data) != GNI_CQ_EVENT_TYPE_POST) {
|
||||
/* ignore smsg completion */
|
||||
if (GNI_RC_NOT_DONE == rc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -139,10 +140,27 @@ ompi_common_ugni_process_completed_post (ompi_common_ugni_device_t *dev,
|
||||
/* TODO -- need to handle overrun -- how do we do this without an event?
|
||||
will the event eventually come back? Ask Cray */
|
||||
OPAL_OUTPUT((-1, "post error! cq overrun = %d", (int)GNI_CQ_OVERRUN(event_data)));
|
||||
assert (GNI_RC_SUCCESS == rc);
|
||||
assert (0);
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
/* local SMS completion */
|
||||
if (GNI_CQ_GET_TYPE(event_data) == GNI_CQ_EVENT_TYPE_SMSG) {
|
||||
uint32_t msg_id = GNI_CQ_GET_MSG_ID(event_data);
|
||||
uint32_t ep_id = 0x00ffffff & msg_id;
|
||||
|
||||
if ((uint32_t)-1 == msg_id) {
|
||||
/* nothing to do */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* inform the btl of local smsg completion */
|
||||
mca_btl_ugni_local_smsg_complete (dev->dev_eps[ep_id]->btl_ctx, msg_id,
|
||||
GNI_CQ_STATUS_OK(event_data) ? OMPI_SUCCESS : OMPI_ERROR);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
rc = GNI_GetCompleted (cq_handle, event_data, (gni_post_descriptor_t **) &desc);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "Error in GNI_GetComplete %s", gni_err_str[rc]));
|
||||
@ -183,7 +201,7 @@ static inline int ompi_common_ugni_progress (void) {
|
||||
|
||||
for (i = 0, count = 0 ; i < ompi_common_ugni_module.device_count ; ++i) {
|
||||
dev = ompi_common_ugni_module.devices + i;
|
||||
/* progress fma transactions (ignore local smsg) */
|
||||
/* progress fma/local smsg completions */
|
||||
count += ompi_common_ugni_process_completed_post (dev, dev->dev_local_cq);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -31,6 +31,7 @@ struct ompi_common_ugni_endpoint_t {
|
||||
struct ompi_common_ugni_device_t *dev; /**< device this endpoint is using */
|
||||
opal_mutex_t lock;
|
||||
int bind_count; /**< bind reference count */
|
||||
void *btl_ctx; /**< btl context for this endpoint */
|
||||
};
|
||||
typedef struct ompi_common_ugni_endpoint_t ompi_common_ugni_endpoint_t;
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user