1
1

If SM BTL cannot send fragment because the cyclic buffer is full put the

fragment on the pending list and send it later instead of spinning on
opal_progress().

This commit was SVN r16537.
Этот коммит содержится в:
Gleb Natapov 2007-10-22 12:07:22 +00:00
родитель 42d6cf27c3
Коммит 63dde87076
5 изменённых файлов: 89 добавлений и 19 удалений

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -201,7 +202,8 @@ int mca_btl_sm_add_procs(
}
peer->peer_smp_rank = n_local_procs +
mca_btl_sm_component.num_smp_procs;
OBJ_CONSTRUCT(&peer->pending_sends, opal_list_t);
#if OMPI_ENABLE_PROGRESS_THREADS == 1
sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir,
(unsigned long)procs[proc]->proc_name.vpid);
@ -575,6 +577,11 @@ int mca_btl_sm_add_procs(
mca_btl_sm_component.sm_free_list_inc,
NULL);
opal_free_list_init(&mca_btl_sm_component.pending_send_fl,
sizeof(btl_sm_pending_send_item_t),
OBJ_CLASS(opal_free_list_item_t),
16, -1, 32);
/* set up mca_btl_sm_component.list_smp_procs */
mca_btl_sm_component.list_smp_procs=(int *)
malloc(mca_btl_sm_component.sm_max_procs*sizeof(int));
@ -799,17 +806,21 @@ int mca_btl_sm_send(
mca_btl_base_tag_t tag)
{
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)descriptor;
int rc;
/* availble header space */
/* available header space */
frag->hdr->len = frag->segment.seg_len;
/* type of message, pt-2-pt, one-sided, etc */
frag->hdr->tag = tag;
frag->endpoint = endpoint;
/*
* post the descriptor in the queue - post with the relative
* address
*/
MCA_BTL_SM_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, frag->hdr);
MCA_BTL_SM_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
endpoint->peer_smp_rank, frag->hdr, false, rc);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -10,6 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -35,6 +36,7 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include "opal/class/opal_free_list.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/class/ompi_bitmap.h"
#include "ompi/class/ompi_fifo.h"
@ -124,6 +126,8 @@ struct mca_btl_sm_component_t {
awaiting resources */
struct mca_btl_base_endpoint_t **sm_peers;
opal_free_list_t pending_send_fl;
#if OMPI_ENABLE_PROGRESS_THREADS == 1
char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */
int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */
@ -133,6 +137,13 @@ struct mca_btl_sm_component_t {
typedef struct mca_btl_sm_component_t mca_btl_sm_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_sm_component_t mca_btl_sm_component;
struct btl_sm_pending_send_item_t
{
opal_free_list_item_t super;
void *data;
};
typedef struct btl_sm_pending_send_item_t btl_sm_pending_send_item_t;
/**
* Register shared memory module parameters with the MCA framework
*/

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -178,6 +179,7 @@ int mca_btl_sm_component_open(void)
OBJ_CONSTRUCT(&mca_btl_sm_component.sm_frags, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_sm_component.sm_frags1, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_sm_component.sm_frags2, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_sm_component.pending_send_fl, opal_free_list_t);
return OMPI_SUCCESS;
}
@ -322,6 +324,48 @@ void mca_btl_sm_component_event_thread(opal_object_t* thread)
}
#endif
void
btl_sm_add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend)
{
int rc;
btl_sm_pending_send_item_t *si;
opal_free_list_item_t *i;
OPAL_FREE_LIST_GET(&mca_btl_sm_component.pending_send_fl, i, rc);
/* don't handle error for now */
assert(i != NULL && rc == OMPI_SUCCESS);
si = (btl_sm_pending_send_item_t*)i;
si->data = data;
/* if data was on pending send list then prepend it to the list to
* minimize reordering */
if(resend)
opal_list_prepend(&ep->pending_sends, (opal_list_item_t*)si);
else
opal_list_append(&ep->pending_sends, (opal_list_item_t*)si);
}
static int process_pending_send(struct mca_btl_base_endpoint_t *ep)
{
btl_sm_pending_send_item_t *si;
void *data;
opal_list_item_t *i = opal_list_remove_first(&ep->pending_sends);
int rc;
if(NULL == i) return OMPI_ERROR;
si = (btl_sm_pending_send_item_t*)i;
data = si->data;
OPAL_FREE_LIST_RETURN(&mca_btl_sm_component.pending_send_fl, i);
MCA_BTL_SM_FIFO_WRITE(ep, ep->my_smp_rank, ep->peer_smp_rank, data,
true, rc);
return rc;
}
int mca_btl_sm_component_progress(void)
{
/* local variables */
@ -330,16 +374,10 @@ int mca_btl_sm_component_progress(void)
mca_btl_sm_frag_t Frag;
ompi_fifo_t *fifo = NULL;
mca_btl_sm_hdr_t *hdr;
int my_smp_rank=mca_btl_sm_component.my_smp_rank;
int my_smp_rank = mca_btl_sm_component.my_smp_rank;
int proc;
int rc = 0;
/* send progress is made by the PML */
/*
* receive progress
*/
/* poll each fifo */
for(proc = 0; proc < mca_btl_sm_component.num_smp_procs - 1; proc++) {
peer_smp_rank = mca_btl_sm_component.list_smp_procs[proc];
@ -377,13 +415,15 @@ int mca_btl_sm_component_progress(void)
/* completion callback */
frag->base.des_cbfunc(&mca_btl_sm.super, frag->endpoint,
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
if(opal_list_get_size(&frag->endpoint->pending_sends))
process_pending_send(frag->endpoint);
break;
}
case MCA_BTL_SM_FRAG_SEND:
{
mca_btl_sm_recv_reg_t* reg;
/* change the address from address relative to the shared
* memory address, to a true virtual address */
* memory address, to a true virtual address */
hdr = (mca_btl_sm_hdr_t *)((char *)hdr +
mca_btl_sm_component.sm_offset[peer_smp_rank]);
/* recv upcall */
@ -397,7 +437,7 @@ int mca_btl_sm_component_progress(void)
reg->cbdata);
MCA_BTL_SM_FIFO_WRITE(
mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr->frag);
my_smp_rank, peer_smp_rank, hdr->frag, false, rc);
break;
}
default:
@ -406,7 +446,7 @@ int mca_btl_sm_component_progress(void)
MCA_BTL_SM_FRAG_STATUS_MASK);
MCA_BTL_SM_FIFO_WRITE(
mca_btl_sm_component.sm_peers[peer_smp_rank],
my_smp_rank, peer_smp_rank, hdr);
my_smp_rank, peer_smp_rank, hdr, false, rc);
break;
}
rc++;

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -39,7 +40,9 @@ struct mca_btl_base_endpoint_t {
#if OMPI_ENABLE_PROGRESS_THREADS == 1
int fifo_fd; /**< pipe/fifo used to signal endpoint that data is queued */
#endif
opal_list_t pending_sends; /**< pending data to send */
};
void
btl_sm_add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend);
#endif

Просмотреть файл

@ -4,7 +4,8 @@
#include "btl_sm.h"
#include "btl_sm_endpoint.h"
#define MCA_BTL_SM_FIFO_WRITE(endpoint_peer, my_smp_rank,peer_smp_rank,hdr) \
#define MCA_BTL_SM_FIFO_WRITE(endpoint_peer, my_smp_rank, \
peer_smp_rank, hdr, resend, rc) \
do { \
ompi_fifo_t* fifo; \
fifo=&(mca_btl_sm_component.fifo[peer_smp_rank][my_smp_rank]); \
@ -13,10 +14,14 @@ do { \
if(opal_using_threads()) \
opal_atomic_lock(fifo->head_lock); \
/* post fragment */ \
while(ompi_fifo_write_to_head(hdr, fifo, \
mca_btl_sm_component.sm_mpool) != OMPI_SUCCESS) \
opal_progress(); \
MCA_BTL_SM_SIGNAL_PEER(endpoint_peer); \
if(ompi_fifo_write_to_head(hdr, fifo, mca_btl_sm_component.sm_mpool) \
!= OMPI_SUCCESS) { \
btl_sm_add_pending(endpoint_peer, hdr, resend); \
rc = OMPI_ERR_TEMP_OUT_OF_RESOURCE; \
} else { \
MCA_BTL_SM_SIGNAL_PEER(endpoint_peer); \
rc = OMPI_SUCCESS; \
} \
if(opal_using_threads()) \
opal_atomic_unlock(fifo->head_lock); \
} while(0)