1
1

Merge pull request #327 from hppritcha/topic/async_progress

Topic/async progress
Этот коммит содержится в:
Howard Pritchard 2015-01-05 16:20:44 -07:00
родитель f009c8425e 065c756860
Коммит c857cc926c
14 изменённых файлов: 378 добавлений и 31 удалений

Просмотреть файл

@ -647,7 +647,7 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc,
int rc;
mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_ob1_fin_hdr_t),
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL);
if(NULL == fin) {
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);

Просмотреть файл

@ -53,6 +53,7 @@
#define MCA_PML_OB1_HDR_FLAGS_PIN 4 /* is user buffer pinned */
#define MCA_PML_OB1_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */
#define MCA_PML_OB1_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */
#define MCA_PML_OB1_HDR_FLAGS_SIGNAL 32 /* message can be optionally signalling */
/**
* Common hdr attributes - must be first element in each hdr type

Просмотреть файл

@ -227,7 +227,8 @@ int mca_pml_ob1_recv_request_ack_send_btl(
/* allocate descriptor */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_ack_hdr_t),
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
MCA_BTL_DES_SEND_ALWAYS_CALLBACK | MCA_BTL_DES_FLAGS_SIGNAL);
if( OPAL_UNLIKELY(NULL == des) ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -370,7 +371,7 @@ static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
/* prepare a descriptor for rdma control message */
mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
MCA_BTL_DES_SEND_ALWAYS_CALLBACK | MCA_BTL_DES_FLAGS_SIGNAL);
if (OPAL_UNLIKELY(NULL == ctl)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -985,7 +986,8 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
/* prepare a descriptor for rdma control message */
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rdma_hdr_t) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
MCA_BTL_DES_SEND_ALWAYS_CALLBACK | MCA_BTL_DES_FLAGS_SIGNAL);
if( OPAL_UNLIKELY(NULL == ctl) ) {
mca_bml_base_free(bml_btl,dst);

Просмотреть файл

@ -384,7 +384,8 @@ int mca_pml_ob1_send_request_start_buffered(
mca_bml_base_alloc(bml_btl, &des,
MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_rendezvous_hdr_t) + size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
MCA_BTL_DES_FLAGS_SIGNAL);
if( OPAL_UNLIKELY(NULL == des) ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -720,7 +721,8 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
/* allocate space for get hdr + segment list */
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + seg_size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
MCA_BTL_DES_FLAGS_SIGNAL);
if( OPAL_UNLIKELY(NULL == des) ) {
/* NTH: no need to reset the converter here. it will be reset before it is retried */
mca_bml_base_free(bml_btl, src);
@ -811,7 +813,8 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq,
MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_rendezvous_hdr_t),
&size,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP,
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
MCA_BTL_DES_FLAGS_SIGNAL,
&des );
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_noaccess,
@ -828,7 +831,7 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq,
/* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = flags;
hdr->hdr_common.hdr_flags = flags | MCA_PML_OB1_HDR_FLAGS_SIGNAL;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
@ -1023,7 +1026,9 @@ cannot_pack:
&sendreq->req_send.req_base.req_convertor,
MCA_BTL_NO_ORDER,
sizeof(mca_pml_ob1_frag_hdr_t),
&size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK, &des);
&size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
MCA_BTL_DES_FLAGS_SIGNAL,
&des);
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_noaccess,
sendreq->req_send.req_base.req_addr,

Просмотреть файл

@ -38,6 +38,7 @@ ugni_SOURCES = \
btl_ugni.h \
btl_ugni_smsg.h \
btl_ugni_smsg.c \
btl_ugni_progress_thread.c \
btl_ugni_prepare.h
mcacomponentdir = $(opallibdir)

Просмотреть файл

@ -178,6 +178,13 @@ typedef struct mca_btl_ugni_component_t {
/* Number of mailboxes to allocate in each block */
unsigned int mbox_increment;
/* Indicate whether progress thread requested */
bool progress_thread_requested;
/* Indicate whether progress thread allowed */
bool progress_thread_enabled;
} mca_btl_ugni_component_t;
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
@ -311,5 +318,8 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
return ((uint64_t) (name.jobid & 0x7fffffff) << 32 | name.vpid);
}
int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t* btl);
int mca_btl_ugni_kill_progress_thread(void);
#endif

Просмотреть файл

@ -37,6 +37,7 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
opal_proc_t *my_proc = opal_proc_local_get();
size_t i;
int rc;
void *mmap_start_addr;
if (false == ugni_module->initialized) {
@ -119,6 +120,46 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
return opal_common_rc_ugni_to_opal (rc);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->rdma_local_irq_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->smsg_remote_irq_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating remote SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_EpCreate (ugni_module->device->dev_handle, ugni_module->rdma_local_cq,
&ugni_module->local_ep);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating local ugni endpoint"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_EpBind (ugni_module->local_ep,
ugni_module->device->dev_addr,
getpid());
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error binding local ugni endpoint"));
return opal_common_rc_ugni_to_opal (rc);
}
}
rc = mca_btl_ugni_setup_mpools (ugni_module);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/ugni error setting up mpools/free lists"));
@ -131,6 +172,34 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
return rc;
}
/*
* If progress thread enabled, registered a page of memory
* with the smsg_remote_irq_cq. This memory handle is passed
* to ranks which want to communicate with this rank. A rank which
* posts a GNI_PostCqWrite targeting this memory handle generates
* an IRQ at the target node, which ultimately causes the progress
* thread in the target rank to become schedulable.
*/
if (mca_btl_ugni_component.progress_thread_enabled) {
mmap_start_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (NULL == mmap_start_addr) {
BTL_ERROR(("btl/ugni mmap returned error"));
return OPAL_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister(ugni_module->device->dev_handle,
(unsigned long)mmap_start_addr,
4096,
ugni_module->smsg_remote_irq_cq,
GNI_MEM_READWRITE,
-1,
&ugni_module->device->smsg_irq_mhndl);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
mca_btl_ugni_spawn_progress_thread(btl);
}
ugni_module->initialized = true;
}

Просмотреть файл

@ -189,6 +189,16 @@ btl_ugni_component_register(void)
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.smsg_page_size);
mca_btl_ugni_component.progress_thread_requested = 0;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"request_progress_thread",
"Enable to request ugni btl progress thread - requires MPI_THREAD_MULTIPLE support",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.progress_thread_requested);
/* btl/ugni can only support only a fixed set of mpools (these mpools have compatible resource
* structures) */
rc = mca_base_var_enum_create ("btl_ugni_mpool", mpool_values, &new_enum);
@ -293,6 +303,10 @@ mca_btl_ugni_component_init (int *num_btl_modules,
mca_btl_ugni_component.ugni_fma_limit = 65536;
}
if (enable_mpi_threads && mca_btl_ugni_component.progress_thread_requested) {
mca_btl_ugni_component.progress_thread_enabled = 1;
}
/* Initialize ugni library and create communication domain */
rc = opal_common_ugni_init();
if (OPAL_SUCCESS != rc) {
@ -564,7 +578,9 @@ static int mca_btl_ugni_component_progress (void)
count += mca_btl_ugni_progress_local_smsg (ugni_module);
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
count += mca_btl_ugni_progress_rdma (ugni_module, 0);
if (mca_btl_ugni_component.progress_thread_enabled) {
count += mca_btl_ugni_progress_rdma (ugni_module, 1);
}
}
return count;

Просмотреть файл

@ -81,6 +81,7 @@ typedef struct mca_btl_ugni_base_frag_t {
mca_btl_base_endpoint_t *endpoint;
mca_btl_ugni_reg_t *registration;
ompi_free_list_t *my_list;
frag_cb_t cbfunc;
} mca_btl_ugni_base_frag_t;
typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_smsg_frag_t;

Просмотреть файл

@ -145,21 +145,35 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
rc = opal_hash_table_get_next_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, node, &node);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
mca_btl_ugni_kill_progress_thread();
}
/* destroy all cqs */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqDestroy (ugni_module->rdma_local_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local BTE/FMA CQ"));
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_local_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local SMSG CQ"));
BTL_ERROR(("error tearing down TX SMSG CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down remote SMSG CQ"));
BTL_ERROR(("error tearing down RX SMSG CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->rdma_local_irq_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_remote_irq_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down remote SMSG CQ - %s",gni_err_str[rc]));
}
/* cancel wildcard post */
@ -173,7 +187,7 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
/* tear down wildcard endpoint */
rc = GNI_EpDestroy (ugni_module->wildcard_ep);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni error destroying endpoint"));
BTL_VERBOSE(("btl/ugni error destroying endpoint - %s",gni_err_str[rc]));
}
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
}

197
opal/mca/btl/ugni/btl_ugni_progress_thread.c Обычный файл
Просмотреть файл

@ -0,0 +1,197 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "btl_ugni.h"
#include "btl_ugni_frag.h"
#include "btl_ugni_smsg.h"
#include "opal/include/opal/align.h"
static pthread_t mca_btl_ugni_progress_thread_id;
static pthread_mutex_t progress_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t progress_cond = PTHREAD_COND_INITIALIZER;
static volatile int stop_progress_thread = 0;
static volatile int progress_thread_done = 0;
static int thread_wakeups = 0;
static void *mca_btl_ugni_prog_thread_fn(void * data)
{
int rc,ret = OPAL_SUCCESS;
uint32_t which;
gni_return_t status;
gni_cq_handle_t cq_vec[2];
struct mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *)data;
/*
* need to block signals
*/
cq_vec[0] = btl->smsg_remote_irq_cq;
cq_vec[1] = btl->rdma_local_irq_cq;
while (stop_progress_thread == 0) {
/*
* this ugni call doesn't need a lock
*/
status = GNI_CqVectorMonitor(cq_vec,
2,
-1,
&which);
if (status == GNI_RC_NOT_DONE) continue;
if ((status == GNI_RC_SUCCESS) && (stop_progress_thread == 0)) {
thread_wakeups++;
opal_progress();
}
}
/* Send a signal to the main thread saying we are done */
rc = pthread_mutex_lock(&progress_mutex);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_mutex_lock returned %s ",strerror(rc)));
ret = OPAL_ERROR;
goto fn_exit;
}
progress_thread_done = 1;
rc = pthread_mutex_unlock(&progress_mutex);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_mutex_unlock returned %s ",strerror(rc)));
ret = OPAL_ERROR;
goto fn_exit;
}
rc = pthread_cond_signal(&progress_cond);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_cond_signal returned %s ",strerror(rc)));
ret = OPAL_ERROR;
}
fn_exit:
return ret;
}
int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t *btl)
{
int rc, ret=OPAL_SUCCESS;
pthread_attr_t attr;
pthread_attr_init(&attr);
rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_attr_setdetachstate returned %s ",strerror(rc)));
ret = OPAL_ERROR;
goto fn_exit;
}
rc = pthread_create(&mca_btl_ugni_progress_thread_id,
&attr, mca_btl_ugni_prog_thread_fn, (void *)btl);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_create returned %s ",strerror(rc)));
ret = OPAL_ERROR;
goto fn_exit;
}
rc = pthread_attr_destroy(&attr);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_attr_destory returned %s ",strerror(rc)));
ret = OPAL_ERROR;
}
fn_exit:
return ret;
}
int mca_btl_ugni_kill_progress_thread(void)
{
int rc, ret=OPAL_SUCCESS;
gni_return_t status;
static mca_btl_ugni_base_frag_t cq_write_frag;
stop_progress_thread = 1;
/*
* post a CQ to myself to wake my thread up
*/
cq_write_frag.post_desc.base.type = GNI_POST_CQWRITE;
cq_write_frag.post_desc.base.cqwrite_value = 0xdead; /* up to 48 bytes here, not used for now */
cq_write_frag.post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
cq_write_frag.post_desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
cq_write_frag.post_desc.base.src_cq_hndl = mca_btl_ugni_component.modules[0].rdma_local_cq;
cq_write_frag.post_desc.base.remote_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
cq_write_frag.post_desc.tries = 0;
cq_write_frag.cbfunc = NULL;
OPAL_THREAD_LOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
status = GNI_PostCqWrite(mca_btl_ugni_component.modules[0].local_ep,
&cq_write_frag.post_desc.base);
OPAL_THREAD_UNLOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
/*
* TODO: if error returned, need to kill off thread manually
*/
if (GNI_RC_SUCCESS != status) {
BTL_ERROR(("GNI_PostCqWrite returned error - %s",gni_err_str[status]));
ret = opal_common_rc_ugni_to_opal(status);
goto fn_exit;
}
rc = pthread_mutex_lock(&progress_mutex);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_mutex_lock returned %s ",strerror(rc)));
ret = OPAL_ERROR;
goto fn_exit;
}
while (!progress_thread_done) {
pthread_cond_wait(&progress_cond, &progress_mutex);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_cond_wait returned %s ",strerror(rc)));
ret = OPAL_ERROR;
goto fn_exit;
}
}
rc = pthread_mutex_unlock(&progress_mutex);
if (0 != rc) {
BTL_ERROR(("btl/ugni pthread_mutex_unlock returned %s ",strerror(rc)));
ret = OPAL_ERROR;
goto fn_exit;
}
/*
* destroy the local_ep
*/
OPAL_THREAD_LOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
status = GNI_EpDestroy (mca_btl_ugni_component.modules[0].local_ep);
OPAL_THREAD_UNLOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != status)) {
BTL_ERROR(("GNI_EpDestroy returned error - %s", gni_err_str[status]));
ret = opal_common_rc_ugni_to_opal(status);
goto fn_exit;
}
fn_exit:
return ret;
}

Просмотреть файл

@ -36,9 +36,7 @@ static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
frag->post_desc.base.remote_mem_hndl = rem_mdh;
frag->post_desc.base.length = bufsize;
#if 0
frag->post_desc.base.rdma_mode = GNI_RDMAMODE_FENCE;
#endif
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.src_cq_hndl = cq_hndl;
frag->post_desc.tries = 0;
@ -70,9 +68,15 @@ static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_pos
gni_return_t status;
/* Post descriptor */
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_cq);
if (mca_btl_ugni_component.progress_thread_enabled) {
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_irq_cq);
} else {
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_cq);
}
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
status = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);

Просмотреть файл

@ -27,17 +27,8 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
mbox->attr.smsg_attr.msg_buffer = base_reg->base;
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->memory_hdl;
#if 0
fprintf(stderr,"ugni_reg->memory_hdl 0x%lx 0x%lx\n",
ugni_reg->memory_hdl.qword1,ugni_reg->memory_hdl.qword2);
#endif
mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (OPAL_PROC_MY_NAME);
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
#if 0
fprintf(stderr,"Invoked mca_btl_ugni_smsg_mbox_construct with mbox->attr.rmt_irq_mem_hndl = 0x%lx 0x%lx\n",
mbox->attr.rmt_irq_mem_hndl.qword1,mbox->attr.rmt_irq_mem_hndl.qword2);
#endif
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_mbox_t, ompi_free_list_item_t,
@ -91,8 +82,7 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
}
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
fprintf (stderr, "Unhandled Smsg error: %s\n", gni_err_str[rc]);
assert (0);
BTL_ERROR(("GNI_SmsgGetNextWTag returned error %s", gni_err_str[rc]));
return OPAL_ERROR;
}

Просмотреть файл

@ -82,12 +82,22 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
return 1;
}
static void mca_btl_ugni_cqwrite_complete (struct mca_btl_ugni_base_frag_t *frag, int rc)
{
frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE;
BTL_VERBOSE(("cqwrite frag complete"));
mca_btl_ugni_frag_return (frag);
}
static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
void *hdr, size_t hdr_len,
void *payload, size_t payload_len,
mca_btl_ugni_smsg_tag_t tag)
{
int rc;
gni_return_t grc;
mca_btl_ugni_base_frag_t *cq_write_frag = NULL;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
grc = GNI_SmsgSendWTag (frag->endpoint->smsg_ep_handle, hdr, hdr_len,
@ -98,6 +108,33 @@ static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
/* increment the active send counter */
opal_atomic_add_32(&frag->endpoint->btl->active_send_count,1);
if (mca_btl_ugni_component.progress_thread_enabled) {
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_SIGNAL) {
rc = mca_btl_ugni_frag_alloc(frag->endpoint,
&frag->endpoint->btl->rdma_frags,
&cq_write_frag);
if (rc == OPAL_SUCCESS) {
cq_write_frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
cq_write_frag->registration = NULL;
cq_write_frag->endpoint = frag->endpoint;
cq_write_frag->post_desc.base.type = GNI_POST_CQWRITE;
cq_write_frag->post_desc.base.cqwrite_value = 0xdead; /* up to 48 bytes here, not used for now */
cq_write_frag->post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
cq_write_frag->post_desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
cq_write_frag->post_desc.base.src_cq_hndl = frag->endpoint->btl->rdma_local_cq;
cq_write_frag->post_desc.base.remote_mem_hndl = frag->endpoint->rmt_irq_mem_hndl;
cq_write_frag->post_desc.tries = 0;
cq_write_frag->cbfunc = mca_btl_ugni_cqwrite_complete;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
grc = GNI_PostCqWrite(frag->endpoint->rdma_ep_handle, &cq_write_frag->post_desc.base);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (grc == GNI_RC_ERROR_RESOURCE) { /* errors for PostCqWrite treated as non-fatal */
mca_btl_ugni_frag_return (cq_write_frag);
}
}
}
}
(void) mca_btl_ugni_progress_local_smsg ((mca_btl_ugni_module_t *) frag->endpoint->btl);
return OPAL_SUCCESS;
}