1
1

* Split send and receive eq sizes

* Need to look at slot count before flowcontrol for sending to prevent
  race in restart
* Need to free pending request fragments when done with the request
* A number of branch prediction optimizations for error conditions

This commit was SVN r26430.
Этот коммит содержится в:
Brian Barrett 2012-05-10 21:43:48 +00:00
родитель 2d78728d38
Коммит 2e52374847
9 изменённых файлов: 143 добавлений и 124 удалений

Просмотреть файл

@ -43,8 +43,10 @@ struct mca_mtl_portals4_module_t {
size_t recv_short_size;
/** Number of short message blocks which should be created during startup */
int recv_short_num;
/** Length of both the receive and send event queues */
int queue_size;
/** Length of the send event queues */
int send_queue_size;
/** Length of the receive event queues */
int recv_queue_size;
/** Protocol for long message transfer */
enum { eager, rndv } protocol;

Просмотреть файл

@ -47,7 +47,7 @@ ompi_mtl_portals4_cancel(struct mca_mtl_base_module_t* mtl,
if (PTL_INVALID_HANDLE != recvreq->me_h) {
ret = PtlMEUnlink(recvreq->me_h);
if (PTL_OK == ret) {
if (OPAL_UNLIKELY(PTL_OK == ret)) {
recvreq->super.super.ompi_req->req_status._cancelled = true;
recvreq->super.super.completion_callback(&recvreq->super.super);
}

Просмотреть файл

@ -100,13 +100,22 @@ ompi_mtl_portals4_component_open(void)
ompi_mtl_portals4.recv_short_size = tmp;
mca_base_param_reg_int(&mca_mtl_portals4_component.mtl_version,
"event_queue_size",
"Size of the event queue in entries",
"send_event_queue_size",
"Size of the send event queue in entries",
false,
false,
1024,
&tmp);
ompi_mtl_portals4.queue_size = tmp;
ompi_mtl_portals4.send_queue_size = tmp;
mca_base_param_reg_int(&mca_mtl_portals4_component.mtl_version,
"recv_event_queue_size",
"Size of the recv event queue in entries",
false,
false,
1024,
&tmp);
ompi_mtl_portals4.recv_queue_size = tmp;
mca_base_param_reg_string(&mca_mtl_portals4_component.mtl_version,
"long_protocol",
@ -140,7 +149,9 @@ ompi_mtl_portals4_component_open(void)
"Short receive blocks: %d",
ompi_mtl_portals4.recv_short_num);
opal_output_verbose(1, ompi_mtl_base_output,
"Queue size: %d", ompi_mtl_portals4.queue_size);
"Send queue size: %d", ompi_mtl_portals4.send_queue_size);
opal_output_verbose(1, ompi_mtl_base_output,
"Recv queue size: %d", ompi_mtl_portals4.recv_queue_size);
opal_output_verbose(1, ompi_mtl_base_output,
"Long protocol: %s",
(ompi_mtl_portals4.protocol == eager) ? "Eager" :
@ -230,7 +241,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
/* create event queues */
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.queue_size,
ompi_mtl_portals4.send_queue_size,
&ompi_mtl_portals4.send_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_output,
@ -239,7 +250,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
goto error;
}
ret = PtlEQAlloc(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.queue_size,
ompi_mtl_portals4.recv_queue_size,
&ompi_mtl_portals4.recv_eq_h);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_mtl_base_output,
@ -516,7 +527,8 @@ ompi_mtl_portals4_progress(void)
}
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
if (0 == count) {
if (OPAL_UNLIKELY(0 == count &&
0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
ompi_mtl_portals4_pending_list_progress();
}
#endif

Просмотреть файл

@ -34,8 +34,6 @@ ompi_mtl_portals4_flowctl_init(void)
ompi_mtl_portals4.flowctl.flowctl_active = false;
OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.active_sends, opal_list_t);
OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.pending_sends, opal_list_t);
OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.pending_fl, opal_free_list_t);
@ -44,7 +42,10 @@ ompi_mtl_portals4_flowctl_init(void)
OBJ_CLASS(ompi_mtl_portals4_pending_request_t),
1, -1, 1);
ompi_mtl_portals4.flowctl.slots = (ompi_mtl_portals4.queue_size - 3) / 3;
ompi_mtl_portals4.flowctl.max_send_slots = (ompi_mtl_portals4.send_queue_size - 3) / 3;
ompi_mtl_portals4.flowctl.send_slots = ompi_mtl_portals4.flowctl.max_send_slots;
opal_output(ompi_mtl_base_output, "num send slots: %d", ompi_mtl_portals4.flowctl.max_send_slots);
ompi_mtl_portals4.flowctl.alert_req.type = portals4_req_flowctl;
ompi_mtl_portals4.flowctl.alert_req.event_callback = flowctl_alert_callback;
@ -59,7 +60,7 @@ ompi_mtl_portals4_flowctl_init(void)
ompi_mtl_portals4.send_eq_h,
REQ_FLOWCTL_TABLE_ID,
&ompi_mtl_portals4.flowctl_idx);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
@ -68,7 +69,7 @@ ompi_mtl_portals4_flowctl_init(void)
ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
&ompi_mtl_portals4.flowctl.trigger_ct_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlCTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
@ -99,7 +100,7 @@ ompi_mtl_portals4_flowctl_init(void)
PTL_PRIORITY_LIST,
NULL,
&ompi_mtl_portals4.flowctl.trigger_me_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
@ -111,7 +112,7 @@ ompi_mtl_portals4_flowctl_init(void)
trigger */
ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
&ompi_mtl_portals4.flowctl.alert_ct_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlCTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
@ -130,7 +131,7 @@ ompi_mtl_portals4_flowctl_init(void)
PTL_PRIORITY_LIST,
&ompi_mtl_portals4.flowctl.alert_req,
&ompi_mtl_portals4.flowctl.alert_me_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
@ -140,7 +141,7 @@ ompi_mtl_portals4_flowctl_init(void)
/* Fanin CT/ME for receiving fan-in for restart */
ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
&ompi_mtl_portals4.flowctl.fanin_ct_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlCTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
@ -160,7 +161,7 @@ ompi_mtl_portals4_flowctl_init(void)
PTL_PRIORITY_LIST,
NULL,
&ompi_mtl_portals4.flowctl.fanin_me_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
@ -170,7 +171,7 @@ ompi_mtl_portals4_flowctl_init(void)
/* Fan-out CT/ME for sending restart messages after fan-in */
ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
&ompi_mtl_portals4.flowctl.fanout_ct_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlCTAlloc failed: %d\n",
__FILE__, __LINE__, ret);
@ -189,7 +190,7 @@ ompi_mtl_portals4_flowctl_init(void)
PTL_PRIORITY_LIST,
&ompi_mtl_portals4.flowctl.fanout_req,
&ompi_mtl_portals4.flowctl.fanout_me_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d\n",
__FILE__, __LINE__, ret);
@ -272,7 +273,7 @@ ompi_mtl_portals4_flowctl_trigger(void)
0,
NULL,
0);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d\n",
__FILE__, __LINE__, ret);
@ -309,9 +310,9 @@ start_recover(void)
ompi_mtl_portals4.flowctl.flowctl_active = true;
ompi_mtl_portals4.flowctl.epoch_counter++;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
opal_output_verbose(1, ompi_mtl_base_output,
"Entering flowctl_start_recover %d",
ompi_mtl_portals4.flowctl.epoch_counter));
ompi_mtl_portals4.flowctl.epoch_counter);
/* re-arm trigger/alarm for next time */
ret = setup_alarm(ompi_mtl_portals4.flowctl.epoch_counter);
@ -332,7 +333,8 @@ start_recover(void)
}
/* drain all pending sends */
while (0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.active_sends)) {
while (ompi_mtl_portals4.flowctl.send_slots !=
ompi_mtl_portals4.flowctl.max_send_slots) {
opal_progress();
}
@ -370,7 +372,7 @@ start_recover(void)
0,
NULL,
0);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d\n",
__FILE__, __LINE__, ret);
@ -410,7 +412,7 @@ setup_alarm(uint32_t epoch)
0,
ompi_mtl_portals4.flowctl.trigger_ct_h,
(epoch * ompi_mtl_portals4.flowctl.num_procs) + 1);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
__FILE__, __LINE__, ret);
@ -432,7 +434,7 @@ setup_alarm(uint32_t epoch)
0,
ompi_mtl_portals4.flowctl.alert_ct_h,
epoch + 1);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
__FILE__, __LINE__, ret);
@ -460,7 +462,7 @@ setup_barrier(uint32_t epoch)
ct,
ompi_mtl_portals4.flowctl.fanin_ct_h,
epoch * (ompi_mtl_portals4.flowctl.num_children + 1));
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlTriggeredCTSet failed: %d\n",
__FILE__, __LINE__, ret);
@ -479,7 +481,7 @@ setup_barrier(uint32_t epoch)
0,
ompi_mtl_portals4.flowctl.fanin_ct_h,
epoch * (ompi_mtl_portals4.flowctl.num_children + 1));
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
__FILE__, __LINE__, ret);
@ -498,7 +500,7 @@ setup_barrier(uint32_t epoch)
0,
ompi_mtl_portals4.flowctl.fanin_ct_h,
epoch * (ompi_mtl_portals4.flowctl.num_children + 1));
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
__FILE__, __LINE__, ret);
@ -519,7 +521,7 @@ setup_barrier(uint32_t epoch)
0,
ompi_mtl_portals4.flowctl.fanout_ct_h,
epoch);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlTriggeredPut failed: %d\n",
__FILE__, __LINE__, ret);
@ -549,7 +551,7 @@ flowctl_fanout_callback(ptl_event_t *ev,
ompi_mtl_portals4.flowctl.flowctl_active = false;
ret = PtlPTEnable(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPTEnabled failed: %d\n",
__FILE__, __LINE__, ret);

Просмотреть файл

@ -35,10 +35,10 @@ OBJ_CLASS_DECLARATION(ompi_mtl_portals4_pending_request_t);
struct ompi_mtl_portals4_flowctl_t {
bool flowctl_active;
opal_list_t active_sends;
int32_t send_slots;
int32_t max_send_slots;
opal_list_t pending_sends;
opal_free_list_t pending_fl;
int32_t slots;
ompi_mtl_portals4_base_request_t alert_req;
ompi_mtl_portals4_base_request_t fanout_req;

Просмотреть файл

@ -36,7 +36,7 @@ completion_fn(ptl_event_t *ev, ompi_mtl_portals4_base_request_t *ptl_base_reques
"%s:%d: completion_fn: %d %d",
__FILE__, __LINE__, ev->type, ev->ni_fail_type);
if (ev->ni_fail_type == PTL_OK) {
if (OPAL_UNLIKELY(ev->ni_fail_type == PTL_OK)) {
ptl_request->found_match = 1;
ptl_request->status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits);
ptl_request->status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits);
@ -103,7 +103,7 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl,
&me,
PTL_SEARCH_ONLY,
&request);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMESearch failed: %d",
__FILE__, __LINE__, ret);
@ -177,7 +177,7 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl,
&me,
PTL_SEARCH_DELETE,
&request);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMESearch failed: %d",
__FILE__, __LINE__, ret);

Просмотреть файл

@ -67,7 +67,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
MTL_PORTALS4_GET_SOURCE(ev->match_bits);
ptl_request->super.super.ompi_req->req_status.MPI_TAG =
MTL_PORTALS4_GET_TAG(ev->match_bits);
if (msg_length > ptl_request->delivery_len) {
if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
opal_output_verbose(1, ompi_mtl_base_output, "truncate expected: %ld %ld",
msg_length, ptl_request->delivery_len);
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
@ -90,7 +90,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ptl_request->md_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d",
@ -106,7 +106,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ev->hdr_data,
ompi_mtl_portals4.eager_limit,
ptl_request);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlGet failed: %d",
__FILE__, __LINE__, ret);
@ -120,7 +120,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
ev->start,
ev->mlength);
if (OMPI_SUCCESS != ret) {
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: ompi_mtl_datatype_unpack failed: %d",
__FILE__, __LINE__, ret);
@ -138,7 +138,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %lu (0x%lx) got reply event",
ptl_request->opcount, ptl_request->hdr_data));
if (ev->ni_fail_type != PTL_NI_OK) {
if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type);
@ -161,7 +161,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
ptl_request->delivery_ptr,
ptl_request->super.super.ompi_req->req_status._ucount);
if (OMPI_SUCCESS != ret) {
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: ompi_mtl_datatype_unpack failed: %d",
__FILE__, __LINE__, ret);
@ -178,7 +178,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %lu (0x%lx) got put_overflow event",
ptl_request->opcount, ev->hdr_data));
if (ev->ni_fail_type != PTL_NI_OK) {
if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type);
@ -192,7 +192,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
MTL_PORTALS4_GET_SOURCE(ev->match_bits);
ptl_request->super.super.ompi_req->req_status.MPI_TAG =
MTL_PORTALS4_GET_TAG(ev->match_bits);
if (msg_length > ptl_request->delivery_len) {
if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
opal_output_verbose(1, ompi_mtl_base_output, "truncate unexpected: %ld %ld %d",
msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
@ -218,7 +218,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
&iov, &iov_count,
&max_data );
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
if (ret < 0) {
if (OPAL_UNLIKELY(ret < 0)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: opal_convertor_unpack failed: %d",
__FILE__, __LINE__, ret);
@ -240,7 +240,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
0,
NULL,
0);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret);
@ -271,7 +271,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ptl_request->md_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d",
@ -289,7 +289,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ev->hdr_data,
ev->mlength,
ptl_request);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlGet failed: %d",
__FILE__, __LINE__, ret);
@ -353,7 +353,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
src, tag);
ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
if (OMPI_SUCCESS != ret) {
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
@ -398,7 +398,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
PTL_PRIORITY_LIST,
ptl_request,
&ptl_request->me_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d",
@ -412,7 +412,6 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
if (length > ompi_mtl_portals4.eager_limit) {
while (true != ptl_request->req_started) {
ompi_mtl_portals4_progress();
opal_atomic_rmb();
}
}
@ -436,12 +435,12 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
(ompi_mtl_portals4_message_t*) (*message)->req_ptr;
ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
if (OMPI_SUCCESS != ret) {
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
#if OPAL_ENABLE_DEBUG
ptl_request->opcount = opal_atomic_add_64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*) &ompi_mtl_portals4.recv_opcount, 1);
ptl_request->hdr_data = 0;
#endif
ptl_request->super.type = portals4_req_recv;

Просмотреть файл

@ -43,7 +43,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
ompi_mtl_portals4_recv_short_block_t *block = ptl_request->block;
if (PTL_EVENT_AUTO_FREE == ev->type) {
if (block->release_on_free) {
if (OPAL_UNLIKELY(block->release_on_free)) {
opal_list_remove_item(&ompi_mtl_portals4.waiting_recv_short_blocks,
&block->base);
ret = ompi_mtl_portals4_recv_short_block_free(block);
@ -124,11 +124,6 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_MANAGE_LOCAL |
PTL_ME_MAY_ALIGN;
#if 0
#if !OPAL_ENABLE_DEBUG
me.options |= PTL_ME_EVENT_COMM_DISABLE;
#endif
#endif
me.match_id.phys.nid = PTL_NID_ANY;
me.match_id.phys.pid = PTL_PID_ANY;
me.match_bits = match_bits;
@ -140,7 +135,7 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
PTL_OVERFLOW_LIST,
&block->request,
&block->me_h);
if (ret == PTL_OK) {
if (OPAL_LIKELY(ret == PTL_OK)) {
ret = OMPI_SUCCESS;
opal_list_append(&ompi_mtl_portals4.active_recv_short_blocks,
&block->base);
@ -164,7 +159,7 @@ ompi_mtl_portals4_recv_short_init(void)
for (i = 0 ; i < ompi_mtl_portals4.recv_short_num ; ++i) {
ompi_mtl_portals4_recv_short_block_t *block =
ompi_mtl_portals4_recv_short_block_alloc(false);
if (NULL == block) {
if (OPAL_UNLIKELY(NULL == block)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
opal_list_append(&ompi_mtl_portals4.waiting_recv_short_blocks,

Просмотреть файл

@ -40,11 +40,12 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
int retval = OMPI_SUCCESS, ret, val, add = 1;
ompi_mtl_portals4_isend_request_t* ptl_request =
(ompi_mtl_portals4_isend_request_t*) ptl_base_request;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
ompi_mtl_portals4_pending_request_t *pending =
ptl_request->pending;
if (ev->ni_fail_type == PTL_NI_PT_DISABLED) {
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
if (OPAL_UNLIKELY(ev->ni_fail_type == PTL_NI_PT_DISABLED)) {
ompi_mtl_portals4_pending_request_t *pending =
ptl_request->pending;
OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_output,
"send %lu hit flow control",
ptl_request->opcount));
@ -59,17 +60,16 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
}
}
opal_list_remove_item(&ompi_mtl_portals4.flowctl.active_sends,
&pending->super.super);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
opal_atomic_add_32(&ompi_mtl_portals4.flowctl.slots, 1);
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
ompi_mtl_portals4_flowctl_trigger();
return OMPI_SUCCESS;
}
#endif
if (ev->ni_fail_type != PTL_NI_OK) {
if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: send callback ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type);
@ -98,16 +98,8 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
}
add++;
}
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
/* once the ack is received, we're out of flow control problem
regions, so we can remove this list entry */
opal_list_remove_item(&ompi_mtl_portals4.flowctl.active_sends,
&pending->super.super);
OPAL_FREE_LIST_RETURN(&ompi_mtl_portals4.flowctl.pending_fl,
&pending->super);
#endif
}
val = opal_atomic_add_32((int32_t*)&ptl_request->event_count, add);
val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add);
if (val >= 3) {
if (NULL != ptl_request->buffer_ptr) {
@ -124,8 +116,13 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
ptl_request->opcount));
*complete = true;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
opal_atomic_add_32(&ompi_mtl_portals4.flowctl.slots, 1);
ompi_mtl_portals4_pending_list_progress();
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
OPAL_FREE_LIST_RETURN(&ompi_mtl_portals4.flowctl.pending_fl,
&ptl_request->pending->super);
if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
ompi_mtl_portals4_pending_list_progress();
}
#endif
}
@ -200,7 +197,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ptl_request->md_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d",
__FILE__, __LINE__, ret);
@ -228,7 +225,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
PTL_PRIORITY_LIST,
ptl_request,
&ptl_request->me_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d",
__FILE__, __LINE__, ret);
@ -258,7 +255,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
0,
ptl_request,
hdr_data);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret);
@ -299,7 +296,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
ret = PtlMDBind(ompi_mtl_portals4.ni_h,
&md,
&ptl_request->md_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d",
__FILE__, __LINE__, ret);
@ -326,7 +323,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
PTL_PRIORITY_LIST,
ptl_request,
&ptl_request->me_h);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d",
__FILE__, __LINE__, ret);
@ -350,7 +347,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag,
0,
ptl_request,
hdr_data);
if (PTL_OK != ret) {
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret);
@ -373,21 +370,19 @@ ompi_mtl_portals4_pending_list_progress()
while ((!ompi_mtl_portals4.flowctl.flowctl_active) &&
(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
val = opal_atomic_add_32(&ompi_mtl_portals4.flowctl.slots, -1);
val = OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1);
if (val <= 0) {
opal_atomic_add_32(&ompi_mtl_portals4.flowctl.slots, 1);
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
return;
}
item = opal_list_remove_first(&ompi_mtl_portals4.flowctl.pending_sends);
if (NULL == item) {
opal_atomic_add_32(&ompi_mtl_portals4.flowctl.slots, 1);
if (OPAL_UNLIKELY(NULL == item)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
return;
}
pending = (ompi_mtl_portals4_pending_request_t*) item;
opal_list_append(&ompi_mtl_portals4.flowctl.active_sends,
&pending->super.super);
if (pending->length <= ompi_mtl_portals4.eager_limit) {
ret = ompi_mtl_portals4_short_isend(pending->mode,
pending->start,
@ -406,11 +401,10 @@ ompi_mtl_portals4_pending_list_progress()
pending->endpoint,
pending->ptl_request);
}
if (OMPI_SUCCESS != ret) {
opal_list_remove_item(&ompi_mtl_portals4.flowctl.active_sends,
&pending->super.super);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
opal_list_prepend(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
}
}
}
@ -418,7 +412,7 @@ ompi_mtl_portals4_pending_list_progress()
static inline int
ompi_mtl_portals4_start_send(struct mca_mtl_base_module_t* mtl,
ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm,
int dest,
int tag,
@ -426,18 +420,22 @@ ompi_mtl_portals4_start_send(struct mca_mtl_base_module_t* mtl,
mca_pml_base_send_mode_t mode,
ompi_mtl_portals4_isend_request_t* ptl_request)
{
int ret;
int ret= OMPI_SUCCESS;
void *start;
size_t length;
bool free_after;
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest);
mca_mtl_base_endpoint_t *endpoint =
(mca_mtl_base_endpoint_t*) ompi_proc->proc_pml;
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
opal_free_list_item_t *item;
ompi_mtl_portals4_pending_request_t *pending;
#endif
ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after);
if (OMPI_SUCCESS != ret) return ret;
ptl_request->opcount = opal_atomic_add_64((int64_t*)&ompi_mtl_portals4.opcount, 1);
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1);
ptl_request->buffer_ptr = (free_after) ? start : NULL;
ptl_request->event_count = 0;
@ -449,29 +447,42 @@ ompi_mtl_portals4_start_send(struct mca_mtl_base_module_t* mtl,
(int)length));
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
{
opal_free_list_item_t *item;
ompi_mtl_portals4_pending_request_t *pending;
OPAL_FREE_LIST_GET(&ompi_mtl_portals4.flowctl.pending_fl, item, ret);
if (NULL == item) return OMPI_ERR_OUT_OF_RESOURCE;
OPAL_FREE_LIST_GET(&ompi_mtl_portals4.flowctl.pending_fl, item, ret);
if (NULL == item) return OMPI_ERR_OUT_OF_RESOURCE;
pending = (ompi_mtl_portals4_pending_request_t*) item;
ptl_request->pending = pending;
pending->mode = mode;
pending->start = start;
pending->length = length;
pending->contextid = comm->c_contextid;
pending->tag = tag;
pending->my_rank = comm->c_my_rank;
pending->endpoint = endpoint;
pending->ptl_request = ptl_request;
pending = (ompi_mtl_portals4_pending_request_t*) item;
pending->mode = mode;
pending->start = start;
pending->length = length;
pending->contextid = comm->c_contextid;
pending->tag = tag;
pending->my_rank = comm->c_my_rank;
pending->endpoint = endpoint;
pending->ptl_request = ptl_request;
ptl_request->pending = pending;
if (OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) <= 0) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
return OMPI_SUCCESS;
}
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
if (OPAL_UNLIKELY(0 != opal_list_get_size(&ompi_mtl_portals4.flowctl.pending_sends))) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
ompi_mtl_portals4_pending_list_progress();
return OMPI_SUCCESS;
}
#else
if (OPAL_UNLIKELY(ompi_mtl_portals4.flowctl.flowctl_active)) {
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
&pending->super.super);
return OMPI_SUCCESS;
}
#endif
if (length <= ompi_mtl_portals4.eager_limit) {
ret = ompi_mtl_portals4_short_isend(mode,
start,
@ -490,7 +501,6 @@ ompi_mtl_portals4_start_send(struct mca_mtl_base_module_t* mtl,
endpoint,
ptl_request);
}
#endif
return ret;
}
@ -512,12 +522,11 @@ ompi_mtl_portals4_send(struct mca_mtl_base_module_t* mtl,
ptl_request.super.super.type = portals4_req_send;
ptl_request.super.super.event_callback = ompi_mtl_portals4_send_callback;
ret = ompi_mtl_portals4_start_send(mtl, comm, dest, tag,
ret = ompi_mtl_portals4_send_start(mtl, comm, dest, tag,
convertor, mode, &ptl_request.super);
if (OMPI_SUCCESS != ret) goto cleanup;
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) goto cleanup;
while (false == ptl_request.complete) {
opal_atomic_mb();
ompi_mtl_portals4_progress();
}
ret = ptl_request.retval;
@ -548,10 +557,10 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
ptl_request->super.type = portals4_req_isend;
ptl_request->super.event_callback = ompi_mtl_portals4_isend_callback;
ret = ompi_mtl_portals4_start_send(mtl, comm, dest, tag,
ret = ompi_mtl_portals4_send_start(mtl, comm, dest, tag,
convertor, mode, ptl_request);
if (OMPI_SUCCESS != ret && NULL != ptl_request->buffer_ptr) {
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret && NULL != ptl_request->buffer_ptr)) {
free(ptl_request->buffer_ptr);
}