2005-05-09 23:37:10 +04:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
2005-08-13 01:42:07 +04:00
|
|
|
#include "ompi/include/constants.h"
|
2005-05-09 23:37:10 +04:00
|
|
|
#include "mca/pml/pml.h"
|
|
|
|
#include "mca/ptl/ptl.h"
|
|
|
|
#include "pml_teg.h"
|
|
|
|
#include "pml_teg_proc.h"
|
|
|
|
#include "pml_teg_sendreq.h"
|
|
|
|
#include "pml_teg_recvreq.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int mca_pml_teg_send_request_fini(struct ompi_request_t** request)
|
|
|
|
{
|
|
|
|
MCA_PML_TEG_FINI(request);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_pml_teg_send_request_free(struct ompi_request_t** request)
|
|
|
|
{
|
|
|
|
MCA_PML_TEG_FREE(request);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_pml_teg_send_request_cancel(struct ompi_request_t* request, int complete)
|
|
|
|
{
|
|
|
|
/* we dont cancel send requests by now */
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void mca_pml_teg_send_request_construct(mca_pml_teg_send_request_t* req)
|
|
|
|
{
|
|
|
|
req->req_cached = false;
|
|
|
|
req->req_send.req_base.req_ompi.req_fini = mca_pml_teg_send_request_fini;
|
|
|
|
req->req_send.req_base.req_ompi.req_free = mca_pml_teg_send_request_free;
|
|
|
|
req->req_send.req_base.req_ompi.req_cancel = mca_pml_teg_send_request_cancel;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void mca_pml_teg_send_request_destruct(mca_pml_teg_send_request_t* req)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(
|
|
|
|
mca_pml_teg_send_request_t,
|
|
|
|
mca_ptl_base_send_request_t,
|
|
|
|
mca_pml_teg_send_request_construct,
|
|
|
|
mca_pml_teg_send_request_destruct);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Schedule message delivery across potentially multiple PTLs.
|
|
|
|
*
|
|
|
|
* @param request (IN) Request to schedule
|
|
|
|
* @return status Error status
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
int mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req)
|
|
|
|
{
|
2005-05-20 02:39:04 +04:00
|
|
|
ompi_proc_t *proc;
|
2005-08-12 06:41:14 +04:00
|
|
|
mca_pml_teg_proc_t* proc_pml;
|
2005-05-09 23:37:10 +04:00
|
|
|
int send_count = 0;
|
|
|
|
size_t bytes_remaining;
|
|
|
|
size_t num_ptl_avail;
|
|
|
|
size_t num_ptl;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only allow one thread in this routine for a given request.
|
|
|
|
* However, we cannot block callers on a mutex, so simply keep track
|
|
|
|
* of the number of times the routine has been called and run through
|
|
|
|
* the scheduling logic once for every call.
|
|
|
|
*/
|
2005-07-04 02:45:48 +04:00
|
|
|
if(OPAL_THREAD_ADD32(&req->req_lock,1) == 1) {
|
2005-05-20 02:39:04 +04:00
|
|
|
proc = ompi_comm_peer_lookup(req->req_send.req_base.req_comm, req->req_send.req_base.req_peer);
|
2005-08-12 06:41:14 +04:00
|
|
|
proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml;
|
2005-05-09 23:37:10 +04:00
|
|
|
do {
|
|
|
|
/* allocate remaining bytes to PTLs */
|
|
|
|
bytes_remaining = req->req_send.req_bytes_packed - req->req_offset;
|
|
|
|
num_ptl_avail = proc_pml->proc_ptl_next.ptl_size;
|
|
|
|
num_ptl = 0;
|
|
|
|
while(bytes_remaining > 0 && num_ptl++ < num_ptl_avail) {
|
|
|
|
mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_next(&proc_pml->proc_ptl_next);
|
|
|
|
mca_ptl_base_module_t* ptl = ptl_proc->ptl;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/* if this is the last PTL that is available to use, or the number of
|
|
|
|
* bytes remaining in the message is less than the PTLs minimum fragment
|
|
|
|
* size, then go ahead and give the rest of the message to this PTL.
|
|
|
|
*/
|
|
|
|
size_t bytes_to_frag;
|
|
|
|
if(num_ptl == num_ptl_avail || bytes_remaining < ptl->ptl_min_frag_size) {
|
|
|
|
bytes_to_frag = bytes_remaining;
|
|
|
|
|
|
|
|
/* otherwise attempt to give the PTL a percentage of the message
|
|
|
|
* based on a weighting factor. for simplicity calculate this as
|
|
|
|
* a percentage of the overall message length (regardless of amount
|
|
|
|
* previously assigned)
|
|
|
|
*/
|
|
|
|
} else {
|
|
|
|
bytes_to_frag = (ptl_proc->ptl_weight * bytes_remaining) / 100;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* makes sure that we don't exceed ptl_max_frag_size */
|
|
|
|
if(ptl->ptl_max_frag_size != 0 && bytes_to_frag > ptl->ptl_max_frag_size)
|
|
|
|
bytes_to_frag = ptl->ptl_max_frag_size;
|
|
|
|
|
|
|
|
rc = ptl->ptl_put(ptl, ptl_proc->ptl_peer, req, req->req_offset, bytes_to_frag, 0);
|
|
|
|
if(rc == OMPI_SUCCESS) {
|
|
|
|
send_count++;
|
|
|
|
bytes_remaining = req->req_send.req_bytes_packed - req->req_offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* unable to complete send - queue for later */
|
|
|
|
if(send_count == 0) {
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_pml_teg.teg_lock);
|
2005-07-03 20:22:16 +04:00
|
|
|
opal_list_append(&mca_pml_teg.teg_send_pending, (opal_list_item_t*)req);
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&mca_pml_teg.teg_lock);
|
2005-05-09 23:37:10 +04:00
|
|
|
req->req_lock = 0;
|
|
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* fragments completed while scheduling - so retry */
|
2005-07-04 02:45:48 +04:00
|
|
|
} while(OPAL_THREAD_ADD32(&req->req_lock,-1) > 0);
|
2005-05-09 23:37:10 +04:00
|
|
|
|
|
|
|
/* free the request if completed while in the scheduler */
|
|
|
|
if (req->req_send.req_base.req_free_called && req->req_send.req_base.req_pml_complete) {
|
|
|
|
MCA_PML_TEG_FREE((ompi_request_t**)&req);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Update the status of the send request to reflect the number of bytes
|
|
|
|
* "actually" sent (and acknowledged). This should be called by the
|
|
|
|
* lower layer PTL after the fragment is actually delivered and has been
|
|
|
|
* acknowledged (if required). Note that this routine should NOT be called
|
|
|
|
* directly by the PTL, a function pointer is setup on the PTL at init to
|
|
|
|
* enable upcalls into the PML w/out directly linking to a specific PML
|
|
|
|
* implementation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void mca_pml_teg_send_request_progress(
|
|
|
|
struct mca_ptl_base_module_t* ptl,
|
|
|
|
mca_ptl_base_send_request_t* req,
|
|
|
|
size_t bytes_sent)
|
|
|
|
{
|
|
|
|
bool schedule = false;
|
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
2005-05-09 23:37:10 +04:00
|
|
|
req->req_bytes_sent += bytes_sent;
|
|
|
|
if (req->req_bytes_sent >= req->req_send.req_bytes_packed) {
|
|
|
|
req->req_send.req_base.req_pml_complete = true;
|
|
|
|
if (req->req_send.req_base.req_ompi.req_complete == false) {
|
|
|
|
req->req_send.req_base.req_ompi.req_status.MPI_SOURCE = req->req_send.req_base.req_comm->c_my_rank;
|
|
|
|
req->req_send.req_base.req_ompi.req_status.MPI_TAG = req->req_send.req_base.req_tag;
|
|
|
|
req->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
|
|
|
|
req->req_send.req_base.req_ompi.req_status._count = req->req_bytes_sent;
|
|
|
|
req->req_send.req_base.req_ompi.req_complete = true;
|
|
|
|
if(ompi_request_waiting) {
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_condition_broadcast(&ompi_request_cond);
|
2005-05-09 23:37:10 +04:00
|
|
|
}
|
|
|
|
} else if(req->req_send.req_base.req_free_called) {
|
|
|
|
/* don't free the request if in the scheduler */
|
|
|
|
if(req->req_lock == 0) {
|
|
|
|
MCA_PML_TEG_FREE((ompi_request_t**)&req);
|
|
|
|
}
|
|
|
|
} else if (req->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
|
|
|
|
mca_pml_base_bsend_request_fini((ompi_request_t*)req);
|
|
|
|
}
|
|
|
|
/* test to see if we have scheduled the entire request */
|
|
|
|
} else if (req->req_offset < req->req_send.req_bytes_packed) {
|
|
|
|
schedule = true;
|
|
|
|
}
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
2005-05-09 23:37:10 +04:00
|
|
|
|
|
|
|
/* schedule remaining fragments of this request */
|
|
|
|
if(schedule) {
|
|
|
|
mca_pml_teg_send_request_schedule(req);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check for pending requests that need to be progressed */
|
2005-07-03 20:22:16 +04:00
|
|
|
while(opal_list_get_size(&mca_pml_teg.teg_send_pending) != 0) {
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_LOCK(&mca_pml_teg.teg_lock);
|
2005-07-03 20:22:16 +04:00
|
|
|
req = (mca_ptl_base_send_request_t*)opal_list_remove_first(&mca_pml_teg.teg_send_pending);
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&mca_pml_teg.teg_lock);
|
2005-05-09 23:37:10 +04:00
|
|
|
if(req == NULL)
|
|
|
|
break;
|
|
|
|
if(mca_pml_teg_send_request_schedule(req) != OMPI_SUCCESS)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|