first cut at ack/retrans protocol
This commit was SVN r8570.
Этот коммит содержится в:
родитель
b06d79d4fe
Коммит
8c1027d974
@ -23,8 +23,6 @@ ob1_sources = \
|
||||
pml_dr_comm.h \
|
||||
pml_dr_component.c \
|
||||
pml_dr_component.h \
|
||||
pml_dr_endpoint.c \
|
||||
pml_dr_endpoint.h \
|
||||
pml_dr_hdr.h \
|
||||
pml_dr_iprobe.c \
|
||||
pml_dr_irecv.c \
|
||||
@ -38,7 +36,8 @@ ob1_sources = \
|
||||
pml_dr_recvreq.h \
|
||||
pml_dr_sendreq.c \
|
||||
pml_dr_sendreq.h \
|
||||
pml_dr_start.c
|
||||
pml_dr_start.c \
|
||||
pml_dr_vfrag.c
|
||||
|
||||
if OMPI_BUILD_pml_dr_DSO
|
||||
component_noinst =
|
||||
|
@ -74,7 +74,7 @@ int mca_pml_dr_add_comm(ompi_communicator_t* comm)
|
||||
if (NULL == pml_comm) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
mca_pml_dr_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
|
||||
mca_pml_dr_comm_init(pml_comm, comm);
|
||||
comm->c_pml_comm = pml_comm;
|
||||
comm->c_pml_procs = (mca_pml_proc_t**)malloc(
|
||||
comm->c_remote_group->grp_proc_count * sizeof(mca_pml_proc_t));
|
||||
|
@ -53,6 +53,9 @@ struct mca_pml_dr_t {
|
||||
size_t send_pipeline_depth;
|
||||
bool enabled;
|
||||
|
||||
time_t tout_ack;
|
||||
time_t tout_watch_dog;
|
||||
|
||||
/* lock queue access */
|
||||
opal_mutex_t lock;
|
||||
|
||||
@ -64,6 +67,7 @@ struct mca_pml_dr_t {
|
||||
ompi_free_list_t send_requests;
|
||||
ompi_free_list_t recv_requests;
|
||||
ompi_free_list_t recv_frags;
|
||||
ompi_free_list_t vfrags;
|
||||
ompi_free_list_t buffers;
|
||||
};
|
||||
typedef struct mca_pml_dr_t mca_pml_dr_t;
|
||||
|
@ -27,6 +27,7 @@
|
||||
static void mca_pml_dr_comm_proc_construct(mca_pml_dr_comm_proc_t* proc)
|
||||
{
|
||||
proc->expected_sequence = 1;
|
||||
proc->vfrag_id = 1;
|
||||
proc->send_sequence = 0;
|
||||
OBJ_CONSTRUCT(&proc->frags_cant_match, opal_list_t);
|
||||
OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t);
|
||||
@ -78,19 +79,21 @@ OBJ_CLASS_INSTANCE(
|
||||
mca_pml_dr_comm_destruct);
|
||||
|
||||
|
||||
int mca_pml_dr_comm_init_size(mca_pml_dr_comm_t* comm, size_t size)
|
||||
int mca_pml_dr_comm_init(mca_pml_dr_comm_t* dr_comm, ompi_communicator_t* ompi_comm)
|
||||
{
|
||||
size_t i;
|
||||
size_t size = ompi_comm->c_remote_group->grp_proc_count;
|
||||
|
||||
/* send message sequence-number support - sender side */
|
||||
comm->procs = malloc(sizeof(mca_pml_dr_comm_proc_t)*size);
|
||||
if(NULL == comm->procs) {
|
||||
dr_comm->procs = malloc(sizeof(mca_pml_dr_comm_proc_t)*size);
|
||||
if(NULL == dr_comm->procs) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for(i=0; i<size; i++) {
|
||||
OBJ_CONSTRUCT(comm->procs+i, mca_pml_dr_comm_proc_t);
|
||||
OBJ_CONSTRUCT(dr_comm->procs+i, mca_pml_dr_comm_proc_t);
|
||||
dr_comm->procs[i].ompi_proc = ompi_comm->c_remote_group->grp_proc_pointers[i];
|
||||
}
|
||||
comm->num_procs = size;
|
||||
dr_comm->num_procs = size;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include "opal/threads/condition.h"
|
||||
#include "mca/ptl/ptl.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -33,6 +35,7 @@ extern "C" {
|
||||
struct mca_pml_dr_comm_proc_t {
|
||||
opal_object_t super;
|
||||
uint16_t expected_sequence; /**< send message sequence number - receiver side */
|
||||
uint32_t vfrag_id; /**< virtual fragment identifier */
|
||||
#if OMPI_HAVE_THREAD_SUPPORT
|
||||
volatile int32_t send_sequence; /**< send side sequence number */
|
||||
#else
|
||||
@ -41,6 +44,7 @@ struct mca_pml_dr_comm_proc_t {
|
||||
opal_list_t frags_cant_match; /**< out-of-order fragment queues */
|
||||
opal_list_t specific_receives; /**< queues of unmatched specific receives */
|
||||
opal_list_t unexpected_frags; /**< unexpected fragment queues */
|
||||
ompi_proc_t* ompi_proc; /**< back pointer to ompi_proc_t */
|
||||
};
|
||||
typedef struct mca_pml_dr_comm_proc_t mca_pml_dr_comm_proc_t;
|
||||
|
||||
@ -69,12 +73,12 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_pml_dr_comm_t);
|
||||
/**
|
||||
* Initialize an instance of mca_pml_dr_comm_t based on the communicator size.
|
||||
*
|
||||
* @param comm Instance of mca_pml_dr_comm_t
|
||||
* @param size Size of communicator
|
||||
* @param dr_comm Instance of mca_pml_dr_comm_t
|
||||
* @param pml_comm Communicator
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int mca_pml_dr_comm_init_size(mca_pml_dr_comm_t* comm, size_t size);
|
||||
OMPI_DECLSPEC extern int mca_pml_dr_comm_init(mca_pml_dr_comm_t* dr_comm, ompi_communicator_t* ompi_comm);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -125,6 +125,16 @@ int mca_pml_dr_component_open(void)
|
||||
mca_pml_dr.free_list_inc,
|
||||
NULL);
|
||||
|
||||
OBJ_CONSTRUCT(&mca_pml_dr.vfrags, ompi_free_list_t);
|
||||
ompi_free_list_init(
|
||||
&mca_pml_dr.vfrags,
|
||||
sizeof(mca_pml_dr_vfrag_t),
|
||||
OBJ_CLASS(mca_pml_dr_vfrag_t),
|
||||
mca_pml_dr.free_list_num,
|
||||
mca_pml_dr.free_list_max,
|
||||
mca_pml_dr.free_list_inc,
|
||||
NULL);
|
||||
|
||||
OBJ_CONSTRUCT(&mca_pml_dr.send_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pml_dr.acks_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pml_dr.buffers, ompi_free_list_t);
|
||||
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_PML_DR_ENDPOINT_H
|
||||
#define MCA_PML_DR_ENDPOINT_H
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "mca/btl/btl.h"
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@ -31,15 +31,13 @@
|
||||
|
||||
#define MCA_PML_DR_HDR_TYPE_MATCH 1
|
||||
#define MCA_PML_DR_HDR_TYPE_RNDV 2
|
||||
#define MCA_PML_DR_HDR_TYPE_ACK 4
|
||||
#define MCA_PML_DR_HDR_TYPE_NACK 5
|
||||
#define MCA_PML_DR_HDR_TYPE_FRAG 6
|
||||
#define MCA_PML_DR_HDR_TYPE_MAX 10
|
||||
#define MCA_PML_DR_HDR_TYPE_ACK 3
|
||||
#define MCA_PML_DR_HDR_TYPE_NACK 4
|
||||
#define MCA_PML_DR_HDR_TYPE_FRAG 5
|
||||
|
||||
#define MCA_PML_DR_HDR_FLAGS_ACK 1 /* is an ack required */
|
||||
#define MCA_PML_DR_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
|
||||
#define MCA_PML_DR_HDR_FLAGS_PIN 4 /* is user buffer pinned */
|
||||
#define MCA_PML_DR_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */
|
||||
#define MCA_PML_DR_HDR_FLAGS_MATCH 4 /* is the ack in response to a match */
|
||||
|
||||
|
||||
/*
|
||||
@ -88,6 +86,7 @@ static inline uint64_t ntoh64(uint64_t val)
|
||||
struct mca_pml_dr_common_hdr_t {
|
||||
uint8_t hdr_type; /**< type of envelope */
|
||||
uint8_t hdr_flags; /**< flags indicating how fragment should be processed */
|
||||
uint16_t hdr_csum; /**< checksum over header */
|
||||
};
|
||||
typedef struct mca_pml_dr_common_hdr_t mca_pml_dr_common_hdr_t;
|
||||
|
||||
@ -100,10 +99,13 @@ typedef struct mca_pml_dr_common_hdr_t mca_pml_dr_common_hdr_t;
|
||||
*/
|
||||
struct mca_pml_dr_match_hdr_t {
|
||||
mca_pml_dr_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint32_t hdr_vid; /**< vfrag id */
|
||||
uint16_t hdr_ctx; /**< communicator index */
|
||||
uint16_t hdr_seq; /**< message sequence number */
|
||||
int32_t hdr_src; /**< source rank */
|
||||
int32_t hdr_tag; /**< user tag */
|
||||
uint16_t hdr_seq; /**< message sequence number */
|
||||
uint32_t hdr_csum; /**< checksum over data */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */
|
||||
};
|
||||
typedef struct mca_pml_dr_match_hdr_t mca_pml_dr_match_hdr_t;
|
||||
|
||||
@ -133,7 +135,6 @@ typedef struct mca_pml_dr_match_hdr_t mca_pml_dr_match_hdr_t;
|
||||
struct mca_pml_dr_rendezvous_hdr_t {
|
||||
mca_pml_dr_match_hdr_t hdr_match;
|
||||
uint64_t hdr_msg_length; /**< message length */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */
|
||||
};
|
||||
typedef struct mca_pml_dr_rendezvous_hdr_t mca_pml_dr_rendezvous_hdr_t;
|
||||
|
||||
@ -154,9 +155,13 @@ typedef struct mca_pml_dr_rendezvous_hdr_t mca_pml_dr_rendezvous_hdr_t;
|
||||
*/
|
||||
struct mca_pml_dr_frag_hdr_t {
|
||||
mca_pml_dr_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint64_t hdr_frag_offset; /**< offset into message */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source request */
|
||||
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
|
||||
uint32_t hdr_vid; /**< virtual frag id */
|
||||
uint16_t hdr_vlen; /**< length of entire vfrag */
|
||||
uint16_t hdr_frag_idx; /**< bit index of this frag w/in vfrag */
|
||||
uint32_t hdr_frag_csum; /**< checksum over data */
|
||||
uint64_t hdr_frag_offset; /**< absolute offset of this fragment */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source req */
|
||||
ompi_ptr_t hdr_dst_req; /**< pointer to receive req */
|
||||
};
|
||||
typedef struct mca_pml_dr_frag_hdr_t mca_pml_dr_frag_hdr_t;
|
||||
|
||||
@ -179,6 +184,8 @@ typedef struct mca_pml_dr_frag_hdr_t mca_pml_dr_frag_hdr_t;
|
||||
|
||||
struct mca_pml_dr_ack_hdr_t {
|
||||
mca_pml_dr_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint32_t hdr_vid; /**< virtual fragment id */
|
||||
uint64_t hdr_vmask; /**< acknowledged frags */
|
||||
ompi_ptr_t hdr_src_req; /**< source request */
|
||||
ompi_ptr_t hdr_dst_req; /**< matched receive request */
|
||||
};
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include "communicator/communicator.h"
|
||||
#include "group/group.h"
|
||||
#include "proc/proc.h"
|
||||
#include "pml_dr_endpoint.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
@ -40,57 +39,6 @@ struct mca_pml_dr_proc_t {
|
||||
typedef struct mca_pml_dr_proc_t mca_pml_dr_proc_t;
|
||||
OMPI_COMP_EXPORT extern opal_class_t mca_pml_dr_proc_t_class;
|
||||
|
||||
/**
|
||||
* Return the mca_pml_proc_t instance cached in the communicators local group.
|
||||
*
|
||||
* @param comm Communicator
|
||||
* @param rank Peer rank
|
||||
* @return mca_pml_proc_t instance
|
||||
*/
|
||||
|
||||
/* static inline mca_pml_dr_proc_t* mca_pml_dr_proc_lookup_local(ompi_communicator_t* comm, int rank) */
|
||||
/* { */
|
||||
/* return (mca_pml_dr_proc_t*) comm->c_local_group->grp_proc_pointers[rank]->proc_pml; */
|
||||
/* } */
|
||||
|
||||
/* /\** */
|
||||
/* * Return the mca_pml_proc_t instance cached on the communicators remote group. */
|
||||
/* * */
|
||||
/* * @param comm Communicator */
|
||||
/* * @param rank Peer rank */
|
||||
/* * @return mca_pml_proc_t instance */
|
||||
/* *\/ */
|
||||
|
||||
/* static inline mca_pml_dr_proc_t* mca_pml_dr_proc_lookup_remote(ompi_communicator_t* comm, int rank) */
|
||||
/* { */
|
||||
/* return (mca_pml_dr_proc_t*) comm->c_pml_procs[rank]; */
|
||||
/* } */
|
||||
|
||||
/* /\** */
|
||||
/* * Return the mca_btl_peer_t instance corresponding to the process/btl combination. */
|
||||
/* * */
|
||||
/* * @param comm Communicator */
|
||||
/* * @param rank Peer rank */
|
||||
/* * @return mca_pml_proc_t instance */
|
||||
/* *\/ */
|
||||
|
||||
/* static inline struct mca_btl_base_endpoint_t* mca_pml_dr_proc_lookup_remote_endpoint( */
|
||||
/* ompi_communicator_t* comm, */
|
||||
/* int rank, */
|
||||
/* struct mca_btl_base_module_t* btl) */
|
||||
/* { */
|
||||
/* mca_pml_dr_proc_t* proc = (mca_pml_dr_proc_t*) comm->c_pml_procs[rank]; */
|
||||
/* size_t i, size = mca_pml_dr_ep_array_get_size(&proc->btl_eager); */
|
||||
/* mca_pml_dr_endpoint_t* endpoint = proc->btl_eager.arr_endpoints; */
|
||||
/* for(i = 0; i < size; i++) { */
|
||||
/* if(endpoint->btl == btl) { */
|
||||
/* return endpoint->btl_endpoint; */
|
||||
/* } */
|
||||
/* endpoint++; */
|
||||
/* } */
|
||||
/* return NULL; */
|
||||
/* } */
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -50,6 +50,19 @@ OBJ_CLASS_INSTANCE(
|
||||
NULL
|
||||
);
|
||||
|
||||
/*
|
||||
* Release resources.
|
||||
*/
|
||||
|
||||
static void mca_pml_dr_ctl_completion(
|
||||
mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status)
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@ -79,8 +92,14 @@ void mca_pml_dr_recv_frag_callback(
|
||||
{
|
||||
mca_pml_dr_send_request_t* sendreq = (mca_pml_dr_send_request_t*)
|
||||
hdr->hdr_ack.hdr_src_req.pval;
|
||||
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
||||
MCA_PML_DR_SEND_REQUEST_ADVANCE(sendreq);
|
||||
mca_pml_dr_send_request_acked(sendreq, &hdr->hdr_ack);
|
||||
break;
|
||||
}
|
||||
case MCA_PML_DR_HDR_TYPE_NACK:
|
||||
{
|
||||
mca_pml_dr_send_request_t* sendreq = (mca_pml_dr_send_request_t*)
|
||||
hdr->hdr_ack.hdr_src_req.pval;
|
||||
mca_pml_dr_send_request_nacked(sendreq, &hdr->hdr_ack);
|
||||
break;
|
||||
}
|
||||
case MCA_PML_DR_HDR_TYPE_FRAG:
|
||||
@ -382,7 +401,7 @@ static bool mca_pml_dr_check_cantmatch_for_match(
|
||||
* - fragments may be corrupt
|
||||
* - this routine may be called simultaneously by more than one thread
|
||||
*/
|
||||
int mca_pml_dr_recv_frag_match(
|
||||
bool mca_pml_dr_recv_frag_match(
|
||||
mca_btl_base_module_t *btl,
|
||||
mca_pml_dr_match_hdr_t *hdr,
|
||||
mca_btl_base_segment_t* segments,
|
||||
@ -396,6 +415,7 @@ int mca_pml_dr_recv_frag_match(
|
||||
mca_pml_dr_comm_proc_t *proc;
|
||||
bool additional_match=false;
|
||||
opal_list_t additional_matches;
|
||||
ompi_proc_t* ompi_proc;
|
||||
int rc;
|
||||
|
||||
/* communicator pointer */
|
||||
@ -405,6 +425,7 @@ int mca_pml_dr_recv_frag_match(
|
||||
/* source sequence number */
|
||||
frag_msg_seq = hdr->hdr_seq;
|
||||
proc = comm->procs + hdr->hdr_src;
|
||||
ompi_proc = proc->ompi_proc;
|
||||
|
||||
/* get next expected message sequence number - if threaded
|
||||
* run, lock to make sure that if another thread is processing
|
||||
@ -427,6 +448,7 @@ int mca_pml_dr_recv_frag_match(
|
||||
|
||||
/* We're now expecting the next sequence number. */
|
||||
(proc->expected_sequence)++;
|
||||
rematch:
|
||||
|
||||
/*
|
||||
* figure out what sort of matching logic to use, if need to
|
||||
@ -459,15 +481,12 @@ int mca_pml_dr_recv_frag_match(
|
||||
*/
|
||||
if( (match->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) ) {
|
||||
|
||||
/* Match a probe, rollback the next expected sequence number */
|
||||
(proc->expected_sequence)--;
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
/* complete the probe */
|
||||
mca_pml_dr_recv_request_matched_probe(match,btl,segments,num_segments);
|
||||
|
||||
/* attempt to match actual request */
|
||||
return mca_pml_dr_recv_frag_match(btl,hdr,segments,num_segments);
|
||||
/* retry the matchh */
|
||||
match = NULL;
|
||||
goto rematch;
|
||||
}
|
||||
} else {
|
||||
|
||||
@ -478,7 +497,7 @@ int mca_pml_dr_recv_frag_match(
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
return rc;
|
||||
}
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,proc->ompi_proc,hdr,segments,num_segments,btl);
|
||||
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
|
||||
}
|
||||
|
||||
@ -503,9 +522,8 @@ int mca_pml_dr_recv_frag_match(
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
return rc;
|
||||
}
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,proc->ompi_proc,hdr,segments,num_segments,btl);
|
||||
opal_list_append(&proc->frags_cant_match, (opal_list_item_t *)frag);
|
||||
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
@ -522,7 +540,7 @@ int mca_pml_dr_recv_frag_match(
|
||||
MCA_PML_DR_RECV_FRAG_RETURN(frag);
|
||||
}
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
return (match != NULL);
|
||||
}
|
||||
|
||||
|
||||
@ -550,7 +568,6 @@ static bool mca_pml_dr_check_cantmatch_for_match(
|
||||
int match_found;
|
||||
uint16_t next_msg_seq_expected, frag_seq;
|
||||
mca_pml_dr_recv_frag_t *frag;
|
||||
mca_pml_dr_recv_request_t *match = NULL;
|
||||
bool match_made = false;
|
||||
|
||||
/*
|
||||
@ -582,6 +599,7 @@ static bool mca_pml_dr_check_cantmatch_for_match(
|
||||
*/
|
||||
frag_seq=frag->hdr.hdr_match.hdr_seq;
|
||||
if (frag_seq == next_msg_seq_expected) {
|
||||
mca_pml_dr_recv_request_t *match = NULL;
|
||||
mca_pml_dr_match_hdr_t* hdr = &frag->hdr.hdr_match;
|
||||
|
||||
/* We're now expecting the next sequence number. */
|
||||
@ -596,6 +614,7 @@ static bool mca_pml_dr_check_cantmatch_for_match(
|
||||
opal_list_remove_item(&proc->frags_cant_match,
|
||||
(opal_list_item_t *)frag);
|
||||
|
||||
rematch:
|
||||
/*
|
||||
* figure out what sort of matching logic to use, if need to
|
||||
* look only at "specific" receives, or "wild" receives,
|
||||
@ -623,6 +642,20 @@ static bool mca_pml_dr_check_cantmatch_for_match(
|
||||
/* if match found, process data */
|
||||
if (match) {
|
||||
|
||||
/*
|
||||
* If this was a probe need to queue fragment on unexpected list
|
||||
*/
|
||||
if( (match->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) ) {
|
||||
|
||||
/* complete the probe */
|
||||
mca_pml_dr_recv_request_matched_probe(match,frag->btl,frag->segments,frag->num_segments);
|
||||
|
||||
/* retry the match */
|
||||
match = NULL;
|
||||
goto rematch;
|
||||
|
||||
} else {
|
||||
|
||||
/* associate the receive descriptor with the fragment
|
||||
* descriptor */
|
||||
frag->request=match;
|
||||
@ -635,6 +668,7 @@ static bool mca_pml_dr_check_cantmatch_for_match(
|
||||
OBJ_CONSTRUCT(additional_matches, opal_list_t);
|
||||
}
|
||||
opal_list_append(additional_matches, (opal_list_item_t *)frag);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
@ -656,3 +690,45 @@ static bool mca_pml_dr_check_cantmatch_for_match(
|
||||
return match_made;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate an acknowledgement to the request
|
||||
*/
|
||||
|
||||
void mca_pml_dr_recv_frag_ack(mca_pml_dr_recv_frag_t* frag)
|
||||
{
|
||||
mca_pml_dr_ack_hdr_t* ack;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_bml_base_endpoint_t* bml_endpoint;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
int rc;
|
||||
|
||||
/* lookup btl */
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*)frag->proc->proc_pml;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager);
|
||||
|
||||
/* allocate descriptor */
|
||||
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
|
||||
if(NULL == des) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* fill out header */
|
||||
ack = (mca_pml_dr_ack_hdr_t*)des->des_src->seg_addr.pval;
|
||||
ack->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_ACK;
|
||||
ack->hdr_common.hdr_flags = 0;
|
||||
ack->hdr_vmask = 1;
|
||||
ack->hdr_vid = frag->hdr.hdr_match.hdr_vid;
|
||||
ack->hdr_src_req = frag->hdr.hdr_match.hdr_src_req;
|
||||
ack->hdr_dst_req.pval = NULL;
|
||||
|
||||
/* initialize descriptor */
|
||||
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
des->des_cbfunc = mca_pml_dr_ctl_completion;
|
||||
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,9 @@ struct mca_pml_dr_recv_frag_t {
|
||||
mca_pml_dr_hdr_t hdr;
|
||||
struct mca_pml_dr_recv_request_t* request;
|
||||
size_t num_segments;
|
||||
uint32_t csum;
|
||||
mca_btl_base_module_t* btl;
|
||||
ompi_proc_t* proc;
|
||||
mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS];
|
||||
mca_pml_dr_buffer_t* buffers[MCA_BTL_DES_MAX_SEGMENTS];
|
||||
};
|
||||
@ -58,7 +60,7 @@ do { \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define MCA_PML_DR_RECV_FRAG_INIT(frag, hdr,segs,cnt,btl) \
|
||||
#define MCA_PML_DR_RECV_FRAG_INIT(frag,oproc,hdr,segs,cnt,btl) \
|
||||
do { \
|
||||
size_t i; \
|
||||
mca_btl_base_segment_t* macro_segments = frag->segments; \
|
||||
@ -68,6 +70,9 @@ do { \
|
||||
frag->btl = btl; \
|
||||
frag->hdr = *(mca_pml_dr_hdr_t*)hdr; \
|
||||
frag->num_segments = cnt; \
|
||||
frag->csum = 0; \
|
||||
frag->proc = oproc; \
|
||||
\
|
||||
/* copy over data */ \
|
||||
for(i=0; i<cnt; i++) { \
|
||||
opal_list_item_t* item; \
|
||||
@ -81,6 +86,7 @@ do { \
|
||||
segs[i].seg_addr.pval, \
|
||||
segs[i].seg_len); \
|
||||
} \
|
||||
mca_pml_dr_recv_frag_ack(frag); \
|
||||
\
|
||||
} while(0)
|
||||
|
||||
@ -110,8 +116,7 @@ OMPI_DECLSPEC void mca_pml_dr_recv_frag_callback(
|
||||
mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata
|
||||
);
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Match incoming recv_frags against posted receives.
|
||||
@ -123,12 +128,19 @@ OMPI_DECLSPEC void mca_pml_dr_recv_frag_callback(
|
||||
* @param additional_matches (OUT) List of additional matches
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*/
|
||||
OMPI_DECLSPEC int mca_pml_dr_recv_frag_match(
|
||||
OMPI_DECLSPEC bool mca_pml_dr_recv_frag_match(
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_pml_dr_match_hdr_t *hdr,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments);
|
||||
|
||||
/**
|
||||
* Generate an acknowledgment for an unexpected/out-of-order fragment
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC void mca_pml_dr_recv_frag_ack(
|
||||
mca_pml_dr_recv_frag_t* frag);
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -79,8 +79,8 @@ static int mca_pml_dr_recv_request_cancel(struct ompi_request_t* ompi_request, i
|
||||
ompi_request->req_status._cancelled = true;
|
||||
ompi_request->req_complete = true; /* mark it as completed so all the test/wait functions
|
||||
* on this particular request will finish */
|
||||
/* Now we have a problem if we are in a multi-threaded environment. We shou ld
|
||||
* broadcast the condition on the request in order to allow the other threa ds
|
||||
/* Now we have a problem if we are in a multi-threaded environment. We should
|
||||
* broadcast the condition on the request in order to allow the other threads
|
||||
* to complete their test/wait functions.
|
||||
*/
|
||||
ompi_request_completed++;
|
||||
@ -97,10 +97,16 @@ static void mca_pml_dr_recv_request_construct(mca_pml_dr_recv_request_t* request
|
||||
request->req_recv.req_base.req_ompi.req_fini = mca_pml_dr_recv_request_fini;
|
||||
request->req_recv.req_base.req_ompi.req_free = mca_pml_dr_recv_request_free;
|
||||
request->req_recv.req_base.req_ompi.req_cancel = mca_pml_dr_recv_request_cancel;
|
||||
OBJ_CONSTRUCT(&request->req_vfrag0, mca_pml_dr_vfrag_t);
|
||||
OBJ_CONSTRUCT(&request->req_vfrags, opal_list_t);
|
||||
OBJ_CONSTRUCT(&request->req_mutex, opal_mutex_t);
|
||||
}
|
||||
|
||||
static void mca_pml_dr_recv_request_destruct(mca_pml_dr_recv_request_t* request)
|
||||
{
|
||||
OBJ_DESTRUCT(&request->req_vfrag0);
|
||||
OBJ_DESTRUCT(&request->req_vfrags);
|
||||
OBJ_DESTRUCT(&request->req_mutex);
|
||||
}
|
||||
|
||||
|
||||
@ -127,15 +133,15 @@ static void mca_pml_dr_ctl_completion(
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Generate an ack to the peer after first fragment is matched.
|
||||
*/
|
||||
|
||||
static void mca_pml_dr_recv_request_ack(
|
||||
static void mca_pml_dr_recv_request_matched(
|
||||
mca_pml_dr_recv_request_t* recvreq,
|
||||
mca_pml_dr_rendezvous_hdr_t* hdr,
|
||||
size_t bytes_received)
|
||||
uint8_t type)
|
||||
{
|
||||
ompi_proc_t* proc = (ompi_proc_t*) recvreq->req_proc;
|
||||
ompi_proc_t* proc = recvreq->req_proc;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
@ -160,9 +166,11 @@ static void mca_pml_dr_recv_request_ack(
|
||||
|
||||
/* fill out header */
|
||||
ack = (mca_pml_dr_ack_hdr_t*)des->des_src->seg_addr.pval;
|
||||
ack->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_ACK;
|
||||
ack->hdr_common.hdr_flags = 0;
|
||||
ack->hdr_src_req = hdr->hdr_src_req;
|
||||
ack->hdr_common.hdr_type = type;
|
||||
ack->hdr_common.hdr_flags = MCA_PML_DR_HDR_FLAGS_MATCH;
|
||||
ack->hdr_vid = hdr->hdr_match.hdr_vid;
|
||||
ack->hdr_vmask = 0x1;
|
||||
ack->hdr_src_req = hdr->hdr_match.hdr_src_req;
|
||||
ack->hdr_dst_req.pval = recvreq;
|
||||
|
||||
/* initialize descriptor */
|
||||
@ -185,6 +193,94 @@ retry:
|
||||
opal_list_append(&mca_pml_dr.acks_pending, (opal_list_item_t*)frag);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate an ack to the peer after first fragment is matched.
|
||||
*/
|
||||
|
||||
static void mca_pml_dr_recv_request_nack(
|
||||
mca_pml_dr_recv_request_t* recvreq,
|
||||
mca_pml_dr_frag_hdr_t* hdr)
|
||||
{
|
||||
ompi_proc_t* proc = recvreq->req_proc;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
mca_pml_dr_ack_hdr_t* nack;
|
||||
int rc;
|
||||
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager);
|
||||
|
||||
/* allocate descriptor */
|
||||
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
|
||||
if(NULL == des) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* fill out header */
|
||||
nack = (mca_pml_dr_ack_hdr_t*)des->des_src->seg_addr.pval;
|
||||
nack->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_NACK;
|
||||
nack->hdr_common.hdr_flags = MCA_PML_DR_HDR_FLAGS_MATCH;
|
||||
nack->hdr_vid = hdr->hdr_vid;
|
||||
nack->hdr_vmask = 1 << hdr->hdr_frag_idx;
|
||||
nack->hdr_src_req = hdr->hdr_src_req;
|
||||
nack->hdr_dst_req.pval = recvreq;
|
||||
|
||||
/* initialize descriptor */
|
||||
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
des->des_cbfunc = mca_pml_dr_ctl_completion;
|
||||
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Generate an ack w/ the current vfrag status.
|
||||
*/
|
||||
|
||||
static void mca_pml_dr_recv_request_vfrag_ack(
|
||||
mca_pml_dr_recv_request_t* recvreq,
|
||||
mca_pml_dr_vfrag_t* vfrag)
|
||||
{
|
||||
ompi_proc_t* proc = recvreq->req_proc;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
mca_pml_dr_ack_hdr_t* ack;
|
||||
int rc;
|
||||
|
||||
bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager);
|
||||
|
||||
/* allocate descriptor */
|
||||
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
|
||||
if(NULL == des) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* fill out header */
|
||||
ack = (mca_pml_dr_ack_hdr_t*)des->des_src->seg_addr.pval;
|
||||
ack->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_ACK;
|
||||
ack->hdr_common.hdr_flags = 0;
|
||||
ack->hdr_vid = vfrag->vf_id;
|
||||
ack->hdr_vmask = vfrag->vf_ack;
|
||||
ack->hdr_src_req = recvreq->req_vfrag0.vf_send;
|
||||
ack->hdr_dst_req.pval = recvreq;
|
||||
|
||||
/* initialize descriptor */
|
||||
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
des->des_cbfunc = mca_pml_dr_ctl_completion;
|
||||
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Update the recv request status to reflect the number of bytes
|
||||
@ -202,6 +298,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
size_t data_offset = 0;
|
||||
mca_pml_dr_hdr_t* hdr = (mca_pml_dr_hdr_t*)segments->seg_addr.pval;
|
||||
size_t i;
|
||||
uint32_t csum = 0;
|
||||
|
||||
for(i=0; i<num_segments; i++)
|
||||
bytes_received += segments[i].seg_len;
|
||||
@ -219,16 +316,16 @@ void mca_pml_dr_recv_request_progress(
|
||||
sizeof(mca_pml_dr_match_hdr_t),
|
||||
data_offset,
|
||||
bytes_received,
|
||||
bytes_delivered);
|
||||
bytes_delivered,
|
||||
csum);
|
||||
break;
|
||||
|
||||
case MCA_PML_DR_HDR_TYPE_RNDV:
|
||||
|
||||
bytes_received -= sizeof(mca_pml_dr_rendezvous_hdr_t);
|
||||
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
||||
recvreq->req_send = hdr->hdr_rndv.hdr_src_req;
|
||||
recvreq->req_vfrag0.vf_send = hdr->hdr_match.hdr_src_req;
|
||||
MCA_PML_DR_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
|
||||
mca_pml_dr_recv_request_ack(recvreq, &hdr->hdr_rndv, bytes_received);
|
||||
MCA_PML_DR_RECV_REQUEST_UNPACK(
|
||||
recvreq,
|
||||
segments,
|
||||
@ -236,7 +333,10 @@ void mca_pml_dr_recv_request_progress(
|
||||
sizeof(mca_pml_dr_rendezvous_hdr_t),
|
||||
data_offset,
|
||||
bytes_received,
|
||||
bytes_delivered);
|
||||
bytes_delivered,
|
||||
csum);
|
||||
mca_pml_dr_recv_request_matched(recvreq, &hdr->hdr_rndv,
|
||||
(csum == hdr->hdr_match.hdr_csum) ? MCA_PML_DR_HDR_TYPE_ACK : MCA_PML_DR_HDR_TYPE_NACK);
|
||||
break;
|
||||
|
||||
case MCA_PML_DR_HDR_TYPE_FRAG:
|
||||
@ -250,7 +350,30 @@ void mca_pml_dr_recv_request_progress(
|
||||
sizeof(mca_pml_dr_frag_hdr_t),
|
||||
data_offset,
|
||||
bytes_received,
|
||||
bytes_delivered);
|
||||
bytes_delivered,
|
||||
csum);
|
||||
|
||||
/* if checksum fails - immediately nack this fragment */
|
||||
if(csum != hdr->hdr_frag.hdr_frag_csum) {
|
||||
bytes_received = bytes_delivered = 0;
|
||||
mca_pml_dr_recv_request_nack(recvreq, &hdr->hdr_frag);
|
||||
} else {
|
||||
mca_pml_dr_vfrag_t* vfrag;
|
||||
uint64_t bit = ((uint64_t)1 << hdr->hdr_frag.hdr_frag_idx);
|
||||
|
||||
/* update vfrag status */
|
||||
MCA_PML_DR_RECV_REQUEST_VFRAG_LOOKUP(recvreq, &hdr->hdr_frag, vfrag);
|
||||
if((vfrag->vf_ack & bit) == 0) {
|
||||
vfrag->vf_ack |= bit;
|
||||
if((vfrag->vf_ack & vfrag->vf_mask) == vfrag->vf_mask) {
|
||||
/* done w/ this vfrag - ack it */
|
||||
mca_pml_dr_recv_request_vfrag_ack(recvreq, vfrag);
|
||||
}
|
||||
} else {
|
||||
/* duplicate fragment - send an ack w/ the frags completed */
|
||||
mca_pml_dr_recv_request_vfrag_ack(recvreq, vfrag);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -468,3 +591,4 @@ static mca_pml_dr_recv_frag_t* mca_pml_dr_recv_request_match_specific_proc(
|
||||
return frag;
|
||||
}
|
||||
|
||||
|
||||
|
@ -21,11 +21,14 @@
|
||||
#ifndef OMPI_PML_DR_RECV_REQUEST_H
|
||||
#define OMPI_PML_DR_RECV_REQUEST_H
|
||||
|
||||
#include "pml_dr.h"
|
||||
#include "pml_dr_proc.h"
|
||||
#include "ompi_config.h"
|
||||
#include "mca/mpool/base/base.h"
|
||||
#include "mca/pml/base/pml_base_recvreq.h"
|
||||
|
||||
#include "pml_dr.h"
|
||||
#include "pml_dr_proc.h"
|
||||
#include "pml_dr_vfrag.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -34,7 +37,6 @@ extern "C" {
|
||||
struct mca_pml_dr_recv_request_t {
|
||||
mca_pml_base_recv_request_t req_recv;
|
||||
struct ompi_proc_t *req_proc;
|
||||
ompi_ptr_t req_send;
|
||||
#if OMPI_HAVE_THREAD_SUPPORT
|
||||
volatile int32_t req_lock;
|
||||
#else
|
||||
@ -43,6 +45,11 @@ struct mca_pml_dr_recv_request_t {
|
||||
size_t req_pipeline_depth;
|
||||
size_t req_bytes_received;
|
||||
size_t req_bytes_delivered;
|
||||
|
||||
mca_pml_dr_vfrag_t *req_vfrag;
|
||||
mca_pml_dr_vfrag_t req_vfrag0;
|
||||
opal_list_t req_vfrags;
|
||||
opal_mutex_t req_mutex;
|
||||
};
|
||||
typedef struct mca_pml_dr_recv_request_t mca_pml_dr_recv_request_t;
|
||||
|
||||
@ -105,6 +112,16 @@ do { \
|
||||
*/
|
||||
#define MCA_PML_DR_RECV_REQUEST_RETURN(recvreq) \
|
||||
do { \
|
||||
opal_list_item_t* item; \
|
||||
\
|
||||
/* return vfrags */ \
|
||||
OPAL_THREAD_LOCK(&(recvreq)->req_mutex); \
|
||||
while(NULL != (item = opal_list_remove_first(&(recvreq)->req_vfrags))) { \
|
||||
OMPI_FREE_LIST_RETURN(&mca_pml_dr.vfrags, item); \
|
||||
} \
|
||||
OPAL_THREAD_UNLOCK(&(recvreq)->req_mutex); \
|
||||
\
|
||||
/* decrement reference counts */ \
|
||||
MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
|
||||
OMPI_FREE_LIST_RETURN(&mca_pml_dr.recv_requests, (opal_list_item_t*)(recvreq)); \
|
||||
} while(0)
|
||||
@ -141,6 +158,7 @@ do {
|
||||
(request)->req_recv.req_base.req_pml_complete = false; \
|
||||
(request)->req_recv.req_base.req_ompi.req_complete = false; \
|
||||
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||
(request)->req_vfrag = &(request)->req_vfrag0; \
|
||||
\
|
||||
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
|
||||
* request. This field is used if cancelled to find out if the request \
|
||||
@ -197,7 +215,8 @@ do {
|
||||
seg_offset, \
|
||||
data_offset, \
|
||||
bytes_received, \
|
||||
bytes_delivered) \
|
||||
bytes_delivered, \
|
||||
csum) \
|
||||
do { \
|
||||
if(request->req_recv.req_bytes_packed > 0) { \
|
||||
struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \
|
||||
@ -259,6 +278,49 @@ void mca_pml_dr_recv_request_matched_probe(
|
||||
void mca_pml_dr_recv_request_schedule(
|
||||
mca_pml_dr_recv_request_t* req);
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
||||
#define MCA_PML_DR_RECV_REQUEST_VFRAG_LOOKUP(recvreq,hdr,vfrag) \
|
||||
do { \
|
||||
if((recvreq)->req_vfrag->vf_id == (hdr)->hdr_vid) { \
|
||||
vfrag = (recvreq)->req_vfrag; \
|
||||
} else if ((hdr)->hdr_frag_offset == 0) { \
|
||||
vfrag = &(recvreq)->req_vfrag0; \
|
||||
} else { \
|
||||
opal_list_item_t* item; \
|
||||
int rc; \
|
||||
\
|
||||
vfrag = NULL; \
|
||||
OPAL_THREAD_LOCK(&(recvreq)->req_mutex); \
|
||||
for(item = opal_list_get_first(&(recvreq)->req_vfrags); \
|
||||
item != opal_list_get_end(&(recvreq)->req_vfrags); \
|
||||
item = opal_list_get_next(item)) { \
|
||||
mca_pml_dr_vfrag_t* vf = (mca_pml_dr_vfrag_t*)item; \
|
||||
if(vf->vf_id == (hdr)->hdr_vid) { \
|
||||
vfrag = vf; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
if(NULL == vfrag) { \
|
||||
MCA_PML_DR_VFRAG_ALLOC(vfrag,rc); \
|
||||
if(NULL != vfrag) { \
|
||||
(vfrag)->vf_id = (hdr)->hdr_vid; \
|
||||
(vfrag)->vf_len = (hdr)->hdr_vlen; \
|
||||
(vfrag)->vf_ack = 0; \
|
||||
if((hdr)->hdr_vlen == 64) { \
|
||||
(vfrag)->vf_mask = ~(uint64_t)0; \
|
||||
} else { \
|
||||
(vfrag)->vf_mask = (((uint64_t)1 << (hdr)->hdr_vlen)-1); \
|
||||
} \
|
||||
opal_list_append(&(recvreq)->req_vfrags, (opal_list_item_t*)vfrag); \
|
||||
(recvreq)->req_vfrag = vfrag; \
|
||||
} \
|
||||
} \
|
||||
OPAL_THREAD_UNLOCK(&(recvreq)->req_mutex); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/util/crc.h"
|
||||
#include "ompi/include/constants.h"
|
||||
#include "mca/pml/pml.h"
|
||||
#include "mca/btl/btl.h"
|
||||
@ -30,7 +31,6 @@
|
||||
#include "pml_dr_proc.h"
|
||||
#include "pml_dr_sendreq.h"
|
||||
#include "pml_dr_recvreq.h"
|
||||
#include "pml_dr_endpoint.h"
|
||||
#include "mca/bml/base/base.h"
|
||||
|
||||
|
||||
@ -73,14 +73,26 @@ static int mca_pml_dr_send_request_cancel(struct ompi_request_t* request, int co
|
||||
|
||||
static void mca_pml_dr_send_request_construct(mca_pml_dr_send_request_t* req)
|
||||
{
|
||||
req->req_vfrag0.vf_len = 1;
|
||||
req->req_vfrag0.vf_idx = 1;
|
||||
req->req_vfrag0.vf_mask = 1;
|
||||
req->req_send.req_base.req_type = MCA_PML_REQUEST_SEND;
|
||||
req->req_send.req_base.req_ompi.req_fini = mca_pml_dr_send_request_fini;
|
||||
req->req_send.req_base.req_ompi.req_free = mca_pml_dr_send_request_free;
|
||||
req->req_send.req_base.req_ompi.req_cancel = mca_pml_dr_send_request_cancel;
|
||||
|
||||
OBJ_CONSTRUCT(&req->req_vfrag0, mca_pml_dr_vfrag_t);
|
||||
OBJ_CONSTRUCT(&req->req_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&req->req_retrans, opal_list_t);
|
||||
OBJ_CONSTRUCT(&req->req_mutex, opal_mutex_t);
|
||||
}
|
||||
|
||||
static void mca_pml_dr_send_request_destruct(mca_pml_dr_send_request_t* req)
|
||||
{
|
||||
OBJ_DESTRUCT(&req->req_vfrag0);
|
||||
OBJ_DESTRUCT(&req->req_pending);
|
||||
OBJ_DESTRUCT(&req->req_retrans);
|
||||
OBJ_DESTRUCT(&req->req_mutex);
|
||||
}
|
||||
|
||||
|
||||
@ -115,7 +127,9 @@ void mca_pml_dr_match_completion_cache(
|
||||
|
||||
/* signal request completion */
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
if(sendreq->req_num_acks == sendreq->req_num_vfrags) {
|
||||
MCA_PML_DR_SEND_REQUEST_PML_COMPLETE(sendreq);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
}
|
||||
|
||||
@ -144,7 +158,9 @@ void mca_pml_dr_match_completion_free(
|
||||
|
||||
/* signal request completion */
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
if(sendreq->req_num_acks == sendreq->req_num_vfrags) {
|
||||
MCA_PML_DR_SEND_REQUEST_PML_COMPLETE(sendreq);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
}
|
||||
|
||||
@ -211,7 +227,8 @@ static void mca_pml_dr_frag_completion(
|
||||
/* count bytes of user data actually delivered */
|
||||
MCA_PML_DR_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_dr_frag_hdr_t));
|
||||
if (OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1) == 0 &&
|
||||
sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed) {
|
||||
sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed &&
|
||||
sendreq->req_num_acks == sendreq->req_num_vfrags) {
|
||||
MCA_PML_DR_SEND_REQUEST_PML_COMPLETE(sendreq);
|
||||
schedule = false;
|
||||
} else {
|
||||
@ -272,20 +289,10 @@ int mca_pml_dr_send_request_start_buffered(
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* build rendezvous header */
|
||||
hdr = (mca_pml_dr_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_RNDV;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
|
||||
/* update lengths */
|
||||
segment->seg_len = sizeof(mca_pml_dr_rendezvous_hdr_t) + max_data;
|
||||
sendreq->req_send_offset = max_data;
|
||||
sendreq->req_vfrag0.vf_size = max_data;
|
||||
|
||||
descriptor->des_cbfunc = mca_pml_dr_rndv_completion;
|
||||
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
@ -311,6 +318,21 @@ int mca_pml_dr_send_request_start_buffered(
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* build rendezvous header */
|
||||
hdr = (mca_pml_dr_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_RNDV;
|
||||
hdr->hdr_match.hdr_vid = sendreq->req_vfrag0.vf_id;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_match.hdr_csum = sendreq->req_send.req_convertor.checksum;
|
||||
hdr->hdr_match.hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_common.hdr_csum = opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t));
|
||||
|
||||
/* re-init convertor for packed data */
|
||||
ompi_convertor_prepare_for_send(
|
||||
&sendreq->req_send.req_convertor,
|
||||
@ -376,15 +398,19 @@ int mca_pml_dr_send_request_start_copy(
|
||||
/* build match header */
|
||||
hdr = (mca_pml_dr_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_MATCH;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_match.hdr_csum = sendreq->req_send.req_convertor.checksum;
|
||||
hdr->hdr_common.hdr_csum = opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t));
|
||||
|
||||
/* update lengths */
|
||||
segment->seg_len = sizeof(mca_pml_dr_match_hdr_t) + max_data;
|
||||
sendreq->req_send_offset = max_data;
|
||||
sendreq->req_vfrag0.vf_size = max_data;
|
||||
|
||||
/* short message */
|
||||
descriptor->des_cbfunc = mca_pml_dr_match_completion_free;
|
||||
@ -435,11 +461,14 @@ int mca_pml_dr_send_request_start_prepare(
|
||||
/* build match header */
|
||||
hdr = (mca_pml_dr_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_MATCH;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_match.hdr_csum = sendreq->req_send.req_convertor.checksum;
|
||||
hdr->hdr_common.hdr_csum = opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t));
|
||||
|
||||
/* short message */
|
||||
descriptor->des_cbfunc = mca_pml_dr_match_completion_free;
|
||||
@ -448,6 +477,7 @@ int mca_pml_dr_send_request_start_prepare(
|
||||
|
||||
/* update lengths */
|
||||
sendreq->req_send_offset = size;
|
||||
sendreq->req_vfrag0.vf_size = size;
|
||||
|
||||
/* send */
|
||||
rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML);
|
||||
@ -504,14 +534,15 @@ int mca_pml_dr_send_request_start_rndv(
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_match.hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
|
||||
/* first fragment of a long message */
|
||||
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
des->des_cbdata = sendreq;
|
||||
des->des_cbfunc = mca_pml_dr_rndv_completion;
|
||||
sendreq->req_send_offset = size;
|
||||
sendreq->req_vfrag0.vf_size = size;
|
||||
|
||||
/* send */
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||
@ -537,7 +568,6 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
*/
|
||||
|
||||
mca_bml_base_endpoint_t* bml_endpoint = sendreq->req_endpoint;
|
||||
|
||||
if(OPAL_THREAD_ADD32(&sendreq->req_lock,1) == 1) {
|
||||
do {
|
||||
/* allocate remaining bytes to BTLs */
|
||||
@ -547,58 +577,36 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
|
||||
mca_pml_dr_frag_hdr_t* hdr;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
int rc;
|
||||
size_t size;
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_send);
|
||||
size_t num_btl_avail = bml_endpoint->btl_send.arr_size;
|
||||
mca_pml_dr_vfrag_t* vfrag = sendreq->req_vfrag;
|
||||
size_t size = bytes_remaining;
|
||||
size_t offset = sendreq->req_send_offset - vfrag->vf_offset;
|
||||
int rc;
|
||||
|
||||
if(num_btl_avail == 1 || bytes_remaining < bml_btl->btl_min_send_size) {
|
||||
size = bytes_remaining;
|
||||
|
||||
/* otherwise attempt to give the BTL a percentage of the message
|
||||
* based on a weighting factor. for simplicity calculate this as
|
||||
* a percentage of the overall message length (regardless of amount
|
||||
* previously assigned)
|
||||
*/
|
||||
} else {
|
||||
size = (size_t)(bml_btl->btl_weight * bytes_remaining);
|
||||
/* do we need to allocate a new vfrag */
|
||||
if(vfrag->vf_size == offset) {
|
||||
MCA_PML_DR_VFRAG_ALLOC(vfrag,rc);
|
||||
if(NULL == vfrag) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_dr.lock);
|
||||
opal_list_append(&mca_pml_dr.send_pending, (opal_list_item_t*)sendreq);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_dr.lock);
|
||||
break;
|
||||
}
|
||||
MCA_PML_DR_SEND_REQUEST_VFRAG_INIT(sendreq,bml_endpoint,bytes_remaining,vfrag);
|
||||
offset = 0;
|
||||
sendreq->req_num_vfrags++;
|
||||
}
|
||||
|
||||
/* makes sure that we don't exceed BTL max send size */
|
||||
if (bml_btl->btl_max_send_size != 0 &&
|
||||
size > bml_btl->btl_max_send_size - sizeof(mca_pml_dr_frag_hdr_t)) {
|
||||
size = bml_btl->btl_max_send_size - sizeof(mca_pml_dr_frag_hdr_t);
|
||||
|
||||
/* very expensive - however for buffered sends we need to send on a
|
||||
* boundary that the receiver will be able to unpack completely
|
||||
* using the native datatype
|
||||
*/
|
||||
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
|
||||
ompi_convertor_t convertor;
|
||||
size_t position = sendreq->req_send_offset + size;
|
||||
/*
|
||||
* We need this local convertor in order to correctly compute
|
||||
* the correct position. Therefore we have to correctly construct and
|
||||
* destruct it.
|
||||
*/
|
||||
OBJ_CONSTRUCT( &convertor, ompi_convertor_t );
|
||||
ompi_convertor_copy_and_prepare_for_send(
|
||||
&sendreq->req_send.req_convertor,
|
||||
sendreq->req_send.req_base.req_datatype,
|
||||
sendreq->req_send.req_base.req_count,
|
||||
sendreq->req_send.req_base.req_addr,
|
||||
&convertor);
|
||||
ompi_convertor_set_position(&convertor, &position);
|
||||
OBJ_DESTRUCT( &convertor );
|
||||
size = position - sendreq->req_send_offset;
|
||||
/* makes sure that we don't exceed vfrag size */
|
||||
if (size > vfrag->vf_max_send_size) {
|
||||
size = vfrag->vf_max_send_size;
|
||||
}
|
||||
if (size > vfrag->vf_size - offset) {
|
||||
size = vfrag->vf_size - offset;
|
||||
}
|
||||
|
||||
|
||||
/* pack into a descriptor */
|
||||
ompi_convertor_set_position(&sendreq->req_send.req_convertor,
|
||||
&sendreq->req_send_offset);
|
||||
|
||||
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &sendreq->req_send_offset);
|
||||
mca_bml_base_prepare_src(
|
||||
bml_btl,
|
||||
NULL,
|
||||
@ -607,7 +615,6 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
&size,
|
||||
&des
|
||||
);
|
||||
|
||||
if(des == NULL) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_dr.lock);
|
||||
opal_list_append(&mca_pml_dr.send_pending, (opal_list_item_t*)sendreq);
|
||||
@ -620,15 +627,25 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
/* setup header */
|
||||
hdr = (mca_pml_dr_frag_hdr_t*)des->des_src->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_FRAG;
|
||||
hdr->hdr_vid = vfrag->vf_id;
|
||||
hdr->hdr_vlen = vfrag->vf_len;
|
||||
hdr->hdr_frag_idx = vfrag->vf_idx;
|
||||
hdr->hdr_frag_csum = sendreq->req_send.req_convertor.checksum;
|
||||
hdr->hdr_frag_offset = sendreq->req_send_offset;
|
||||
hdr->hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_dst_req = sendreq->req_recv;
|
||||
hdr->hdr_dst_req = sendreq->req_vfrag0.vf_recv;
|
||||
hdr->hdr_common.hdr_csum = opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t));
|
||||
|
||||
/* update state */
|
||||
vfrag->vf_idx++;
|
||||
sendreq->req_send_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1);
|
||||
|
||||
/* start vfrag watchdog timer */
|
||||
MCA_PML_DR_VFRAG_WDOG_START(vfrag);
|
||||
|
||||
/* initiate send - note that this may complete before the call returns */
|
||||
rc = mca_bml_base_send( bml_btl, des, MCA_BTL_TAG_PML);
|
||||
|
||||
@ -645,8 +662,173 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
}
|
||||
mca_pml_dr_progress();
|
||||
}
|
||||
|
||||
/*
|
||||
* VFrags w/ nacks or that timed out
|
||||
*/
|
||||
while(opal_list_get_size(&sendreq->req_retrans) &&
|
||||
sendreq->req_pipeline_depth < mca_pml_dr.send_pipeline_depth) {
|
||||
mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*)opal_list_get_first(&sendreq->req_retrans);
|
||||
|
||||
/*
|
||||
* Retransmit fragments that have not been acked.
|
||||
*/
|
||||
while(vfrag->vf_idx < vfrag->vf_len &&
|
||||
sendreq->req_pipeline_depth < mca_pml_dr.send_pipeline_depth) {
|
||||
if(((1 << vfrag->vf_idx) & vfrag->vf_ack) == 0) {
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_send);
|
||||
mca_pml_dr_frag_hdr_t* hdr;
|
||||
mca_btl_base_descriptor_t* des;
|
||||
size_t offset = vfrag->vf_offset + (vfrag->vf_max_send_size * vfrag->vf_idx);
|
||||
size_t size;
|
||||
int rc;
|
||||
|
||||
if(vfrag->vf_idx == vfrag->vf_len - 1) {
|
||||
size = vfrag->vf_size - offset;
|
||||
} else {
|
||||
size = vfrag->vf_max_send_size;
|
||||
}
|
||||
|
||||
/* pack into a descriptor */
|
||||
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
|
||||
mca_bml_base_prepare_src(
|
||||
bml_btl,
|
||||
NULL,
|
||||
&sendreq->req_send.req_convertor,
|
||||
sizeof(mca_pml_dr_frag_hdr_t),
|
||||
&size,
|
||||
&des
|
||||
);
|
||||
if(des == NULL) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_dr.lock);
|
||||
opal_list_append(&mca_pml_dr.send_pending, (opal_list_item_t*)sendreq);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_dr.lock);
|
||||
break;
|
||||
}
|
||||
des->des_cbfunc = mca_pml_dr_frag_completion;
|
||||
des->des_cbdata = sendreq;
|
||||
|
||||
/* setup header */
|
||||
hdr = (mca_pml_dr_frag_hdr_t*)des->des_src->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_DR_HDR_TYPE_FRAG;
|
||||
hdr->hdr_vid = vfrag->vf_id;
|
||||
hdr->hdr_vlen = vfrag->vf_len;
|
||||
hdr->hdr_frag_idx = vfrag->vf_idx;
|
||||
hdr->hdr_frag_csum = sendreq->req_send.req_convertor.checksum;
|
||||
hdr->hdr_frag_offset = sendreq->req_send_offset;
|
||||
hdr->hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_dst_req = sendreq->req_vfrag0.vf_recv;
|
||||
hdr->hdr_common.hdr_csum = opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t));
|
||||
|
||||
/* update state */
|
||||
vfrag->vf_idx++;
|
||||
sendreq->req_send_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1);
|
||||
|
||||
/* start vfrag watchdog timer */
|
||||
MCA_PML_DR_VFRAG_WDOG_START(vfrag);
|
||||
|
||||
/* initiate send - note that this may complete before the call returns */
|
||||
rc = mca_bml_base_send( bml_btl, des, MCA_BTL_TAG_PML);
|
||||
|
||||
if(rc == OMPI_SUCCESS) {
|
||||
bytes_remaining -= size;
|
||||
} else {
|
||||
vfrag->vf_idx--;
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1);
|
||||
mca_bml_base_free(bml_btl,des);
|
||||
OPAL_THREAD_LOCK(&mca_pml_dr.lock);
|
||||
opal_list_append(&mca_pml_dr.send_pending, (opal_list_item_t*)sendreq);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_dr.lock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
vfrag->vf_idx++;
|
||||
}
|
||||
|
||||
/* move from retrans to pending list */
|
||||
if(vfrag->vf_idx == vfrag->vf_len) {
|
||||
OPAL_THREAD_LOCK(&senddreq->req_mutex);
|
||||
opal_list_remove_item(&sendreq->req_retrans, (opal_list_item_t*)vfrag);
|
||||
opal_list_append(&sendreq->req_pending, (opal_list_item_t*)vfrag);
|
||||
OPAL_THREAD_UNLOCK(&sendreq->req_mutex);
|
||||
}
|
||||
}
|
||||
} while (OPAL_THREAD_ADD32(&sendreq->req_lock,-1) > 0);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Acknowledgment of vfrag
|
||||
*/
|
||||
void mca_pml_dr_send_request_acked(
|
||||
mca_pml_dr_send_request_t* sendreq,
|
||||
mca_pml_dr_ack_hdr_t* ack)
|
||||
{
|
||||
if(ack->hdr_common.hdr_flags & MCA_PML_DR_HDR_FLAGS_MATCH) {
|
||||
sendreq->req_vfrag0.vf_recv = ack->hdr_dst_req;
|
||||
MCA_PML_DR_SEND_REQUEST_ADVANCE(sendreq);
|
||||
} else {
|
||||
mca_pml_dr_vfrag_t* vfrag;
|
||||
MCA_PML_DR_SEND_REQUEST_VFRAG_PENDING(sendreq, ack, vfrag);
|
||||
if(NULL == vfrag) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* add in acknowledged fragments */
|
||||
vfrag->vf_ack |= ack->hdr_vmask;
|
||||
|
||||
/* have all fragments w/in this vfrag been acked? */
|
||||
if((vfrag->vf_ack & vfrag->vf_mask) == vfrag->vf_mask) {
|
||||
|
||||
/* return vfrag */
|
||||
if (vfrag != &sendreq->req_vfrag0) {
|
||||
MCA_PML_DR_VFRAG_RETURN(vfrag);
|
||||
}
|
||||
|
||||
/* are we done with this request */
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
sendreq->req_num_acks++;
|
||||
if(sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed &&
|
||||
sendreq->req_num_acks == sendreq->req_num_vfrags) {
|
||||
MCA_PML_DR_SEND_REQUEST_PML_COMPLETE(sendreq);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
|
||||
} else {
|
||||
/* retransmit missing fragments */
|
||||
OPAL_THREAD_LOCK(&sendreq->req_mutex);
|
||||
vfrag->vf_idx = 0;
|
||||
opal_list_append(&sendreq->req_retrans, (opal_list_item_t*)vfrag);
|
||||
OPAL_THREAD_UNLOCK(&sendreq->req_mutex);
|
||||
mca_pml_dr_send_request_schedule(sendreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void mca_pml_dr_send_request_nacked(
|
||||
mca_pml_dr_send_request_t* sendreq,
|
||||
mca_pml_dr_ack_hdr_t* ack)
|
||||
{
|
||||
mca_pml_dr_vfrag_t* vfrag;
|
||||
MCA_PML_DR_SEND_REQUEST_VFRAG_PENDING(sendreq, ack, vfrag);
|
||||
if(NULL == vfrag) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* removed nacked bits from acknowledged fragments */
|
||||
vfrag->vf_idx = 0;
|
||||
vfrag->vf_ack &= ~ack->hdr_vmask;
|
||||
|
||||
/* retransmit missing fragments */
|
||||
OPAL_THREAD_LOCK(&sendreq->req_mutex);
|
||||
opal_list_append(&sendreq->req_retrans, (opal_list_item_t*)vfrag);
|
||||
OPAL_THREAD_UNLOCK(&sendreq->req_mutex);
|
||||
mca_pml_dr_send_request_schedule(sendreq);
|
||||
}
|
||||
|
||||
|
@ -21,14 +21,17 @@
|
||||
#ifndef OMPI_PML_DR_SEND_REQUEST_H
|
||||
#define OMPI_PML_DR_SEND_REQUEST_H
|
||||
|
||||
#include "mca/btl/btl.h"
|
||||
#include "mca/pml/base/pml_base_sendreq.h"
|
||||
#include "mca/mpool/base/base.h"
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/datatype/convertor.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/pml/base/pml_base_sendreq.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
|
||||
#include "pml_dr_proc.h"
|
||||
#include "pml_dr_comm.h"
|
||||
#include "pml_dr_hdr.h"
|
||||
#include "datatype/convertor.h"
|
||||
#include "mca/bml/bml.h"
|
||||
#include "pml_dr_vfrag.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
@ -39,17 +42,24 @@ struct mca_pml_dr_send_request_t {
|
||||
mca_pml_base_send_request_t req_send;
|
||||
ompi_proc_t* req_proc;
|
||||
mca_bml_base_endpoint_t* req_endpoint;
|
||||
ompi_ptr_t req_recv;
|
||||
#if OMPI_HAVE_THREAD_SUPPORT
|
||||
volatile int32_t req_state;
|
||||
volatile int32_t req_lock;
|
||||
#else
|
||||
volatile int32_t req_state;
|
||||
volatile int32_t req_lock;
|
||||
int32_t req_state;
|
||||
int32_t req_lock;
|
||||
#endif
|
||||
size_t req_pipeline_depth;
|
||||
size_t req_bytes_delivered;
|
||||
size_t req_send_offset;
|
||||
|
||||
mca_pml_dr_vfrag_t* req_vfrag;
|
||||
mca_pml_dr_vfrag_t req_vfrag0;
|
||||
opal_list_t req_pending;
|
||||
opal_list_t req_retrans;
|
||||
opal_mutex_t req_mutex;
|
||||
size_t req_num_acks;
|
||||
size_t req_num_vfrags;
|
||||
};
|
||||
typedef struct mca_pml_dr_send_request_t mca_pml_dr_send_request_t;
|
||||
|
||||
@ -108,6 +118,7 @@ OBJ_CLASS_DECLARATION(mca_pml_dr_send_request_t);
|
||||
#define MCA_PML_DR_SEND_REQUEST_START(sendreq, rc) \
|
||||
do { \
|
||||
mca_pml_dr_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
|
||||
mca_pml_dr_comm_proc_t* proc = comm->procs + sendreq->req_send.req_base.req_peer; \
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)sendreq->req_proc->proc_pml; \
|
||||
mca_bml_base_btl_t* bml_btl; \
|
||||
size_t size = sendreq->req_send.req_bytes_packed; \
|
||||
@ -126,9 +137,12 @@ do {
|
||||
sendreq->req_send.req_base.req_ompi.req_complete = false; \
|
||||
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||
sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(&proc->send_sequence,1); \
|
||||
sendreq->req_endpoint = endpoint; \
|
||||
sendreq->req_vfrag0.vf_id = OPAL_THREAD_ADD32(&proc->vfrag_id,1); \
|
||||
sendreq->req_vfrag = &sendreq->req_vfrag0; \
|
||||
sendreq->req_num_acks = 0; \
|
||||
sendreq->req_num_vfrags = 0; \
|
||||
\
|
||||
/* select a btl */ \
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
|
||||
@ -282,12 +296,103 @@ do {
|
||||
*/
|
||||
|
||||
#define MCA_PML_DR_SEND_REQUEST_RETURN(sendreq) \
|
||||
{ \
|
||||
do { \
|
||||
/* Let the base handle the reference counts */ \
|
||||
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
|
||||
OMPI_FREE_LIST_RETURN( \
|
||||
&mca_pml_dr.send_requests, (opal_list_item_t*)sendreq); \
|
||||
}
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
* Lookup/allocate a vfrag for the pending send
|
||||
*/
|
||||
|
||||
#define MCA_PML_DR_SEND_REQUEST_VFRAG_INIT(sendreq, endpoint, size, vfrag) \
|
||||
do { \
|
||||
mca_pml_dr_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
|
||||
mca_pml_dr_comm_proc_t* proc = comm->procs + sendreq->req_send.req_base.req_peer; \
|
||||
size_t max_send_size = endpoint->btl_max_send_size - sizeof(mca_pml_dr_frag_hdr_t); \
|
||||
size_t div = size / max_send_size; \
|
||||
\
|
||||
if(div == 0) { \
|
||||
vfrag->vf_len = 1; \
|
||||
vfrag->vf_size = size; \
|
||||
vfrag->vf_mask = 1; \
|
||||
} else if(div > 64) { \
|
||||
vfrag->vf_len = 64; \
|
||||
vfrag->vf_size = (max_send_size << 6); /* size * 64 */ \
|
||||
vfrag->vf_mask = ~(uint64_t)0; \
|
||||
} else if (div == 64) { \
|
||||
size_t mod = size % max_send_size; \
|
||||
vfrag->vf_len = 64; \
|
||||
vfrag->vf_size = (mod ? (size - mod) : size); \
|
||||
vfrag->vf_mask = ~(uint64_t)0; \
|
||||
} else { \
|
||||
size_t mod = size % max_send_size; \
|
||||
vfrag->vf_len = div + (mod ? 1 : 0); \
|
||||
vfrag->vf_size = size; \
|
||||
vfrag->vf_mask = (((uint64_t)1 << vfrag->vf_len) - (uint64_t)1); \
|
||||
} \
|
||||
\
|
||||
vfrag->vf_id = OPAL_THREAD_ADD32(&proc->vfrag_id,1); \
|
||||
vfrag->vf_ack = 0; \
|
||||
vfrag->vf_offset = sendreq->req_send_offset; \
|
||||
vfrag->vf_idx = 0; \
|
||||
vfrag->vf_max_send_size = max_send_size; \
|
||||
opal_list_append(&sendreq->req_pending, (opal_list_item_t*)vfrag); \
|
||||
sendreq->req_vfrag = vfrag; \
|
||||
} while(0)
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
||||
#define MCA_PML_DR_SEND_REQUEST_VFRAG_PENDING(sendreq,hdr,vfrag) \
|
||||
do { \
|
||||
if ((hdr)->hdr_vid == (sendreq)->req_vfrag0.vf_id) { \
|
||||
vfrag = &(sendreq)->req_vfrag0; \
|
||||
} else if((sendreq)->req_vfrag->vf_id == (hdr)->hdr_vid) { \
|
||||
vfrag = (sendreq)->req_vfrag; \
|
||||
opal_list_remove_item(&(sendreq)->req_pending,(opal_list_item_t*)vfrag); \
|
||||
} else { \
|
||||
opal_list_item_t* item; \
|
||||
vfrag = NULL; \
|
||||
OPAL_THREAD_LOCK(&(sendreq)->req_mutex); \
|
||||
for(item = opal_list_get_first(&(sendreq)->req_pending); \
|
||||
item != opal_list_get_end(&(sendreq)->req_pending); \
|
||||
item = opal_list_get_next(item)) { \
|
||||
mca_pml_dr_vfrag_t* vf = (mca_pml_dr_vfrag_t*)item; \
|
||||
if(vf->vf_id == (hdr)->hdr_vid) { \
|
||||
opal_list_remove_item(&(sendreq)->req_pending,item); \
|
||||
vfrag = vf; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
OPAL_THREAD_UNLOCK(&(sendreq)->req_mutex); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
||||
#define MCA_PML_DR_SEND_REQUEST_VFRAG_RETRANS(sendreq,hdr,vfrag) \
|
||||
do { \
|
||||
opal_list_item_t* item; \
|
||||
vfrag = NULL; \
|
||||
OPAL_THREAD_LOCK(&(sendreq)->req_mutex); \
|
||||
for(item = opal_list_get_first(&(sendreq)->req_retrans); \
|
||||
item != opal_list_get_end(&(sendreq)->req_retrans); \
|
||||
item = opal_list_get_next(item)) { \
|
||||
mca_pml_dr_vfrag_t* vf = (mca_pml_dr_vfrag_t*)item; \
|
||||
if(vf->vf_id == (hdr)->hdr_vid) { \
|
||||
vfrag = vf; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
OPAL_THREAD_UNLOCK(&(sendreq)->req_mutex); \
|
||||
} while(0)
|
||||
|
||||
|
||||
/*
|
||||
@ -357,6 +462,21 @@ int mca_pml_dr_send_request_start_rndv(
|
||||
int mca_pml_dr_send_request_schedule(
|
||||
mca_pml_dr_send_request_t* sendreq);
|
||||
|
||||
int mca_pml_dr_send_request_reschedule(
|
||||
mca_pml_dr_send_request_t* sendreq,
|
||||
mca_pml_dr_vfrag_t* vfrag);
|
||||
|
||||
/**
|
||||
* Acknowledgment of vfrag
|
||||
*/
|
||||
void mca_pml_dr_send_request_acked(
|
||||
mca_pml_dr_send_request_t* sendreq,
|
||||
mca_pml_dr_ack_hdr_t*);
|
||||
|
||||
void mca_pml_dr_send_request_nacked(
|
||||
mca_pml_dr_send_request_t* sendreq,
|
||||
mca_pml_dr_ack_hdr_t*);
|
||||
|
||||
/**
|
||||
* Completion callback on match header
|
||||
* Cache descriptor.
|
||||
|
@ -17,10 +17,27 @@
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "mca/pml/pml.h"
|
||||
#include "pml_dr_endpoint.h"
|
||||
#include "pml_dr_vfrag.h"
|
||||
|
||||
|
||||
static void mca_pml_dr_vfrag_construct(mca_pml_dr_vfrag_t* vfrag)
|
||||
{
|
||||
memset(&vfrag->vf_event, 0, sizeof(vfrag->vf_event));
|
||||
}
|
||||
|
||||
|
||||
static void mca_pml_dr_vfrag_destruct(mca_pml_dr_vfrag_t* vfrag)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_pml_dr_vfrag_t,
|
||||
opal_list_item_t,
|
||||
mca_pml_dr_vfrag_construct,
|
||||
mca_pml_dr_vfrag_destruct
|
||||
);
|
||||
|
||||
|
||||
|
79
ompi/mca/pml/dr/pml_dr_vfrag.h
Обычный файл
79
ompi/mca/pml/dr/pml_dr_vfrag.h
Обычный файл
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef OMPI_PML_DR_VFRAG_H_
|
||||
#define OMPI_PML_DR_VFRAG_H_
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/event/event.h"
|
||||
#include "pml_dr.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
struct mca_pml_dr_vfrag_t {
|
||||
opal_list_item_t super;
|
||||
ompi_ptr_t vf_send;
|
||||
ompi_ptr_t vf_recv;
|
||||
uint32_t vf_id;
|
||||
uint16_t vf_idx;
|
||||
uint16_t vf_len;
|
||||
size_t vf_offset;
|
||||
size_t vf_size;
|
||||
size_t vf_max_send_size;
|
||||
uint64_t vf_ack;
|
||||
uint64_t vf_mask;
|
||||
opal_event_t vf_event;
|
||||
};
|
||||
typedef struct mca_pml_dr_vfrag_t mca_pml_dr_vfrag_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_dr_vfrag_t);
|
||||
|
||||
#define MCA_PML_DR_VFRAG_ALLOC(vfrag,rc) \
|
||||
do { \
|
||||
opal_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_pml_dr.vfrags, item, rc); \
|
||||
vfrag = (mca_pml_dr_vfrag_t*)item; \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define MCA_PML_DR_VFRAG_RETURN(vfrag) \
|
||||
do { \
|
||||
OMPI_FREE_LIST_RETURN(&mca_pml_dr.vfrags, (opal_list_item_t*)vfrag); \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_DR_VFRAG_WDOG_START(vfrag) \
|
||||
do { \
|
||||
\
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_DR_VFRAG_WDOG_STOP(vfrag) \
|
||||
do { \
|
||||
\
|
||||
} while(0)
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user