#ifndef MCA_BTL_SMCUDA_FIFO_H #define MCA_BTL_SMCUDA_FIFO_H #include "btl_smcuda.h" #include "btl_smcuda_endpoint.h" static void add_pending(struct mca_btl_base_endpoint_t *ep, void *data, bool resend) { int rc; btl_smcuda_pending_send_item_t *si; opal_free_list_item_t *i; OPAL_FREE_LIST_GET(&mca_btl_smcuda_component.pending_send_fl, i, rc); /* don't handle error for now */ assert(i != NULL && rc == OPAL_SUCCESS); si = (btl_smcuda_pending_send_item_t*)i; si->data = data; OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_pending_sends, +1); /* if data was on pending send list then prepend it to the list to * minimize reordering */ OPAL_THREAD_LOCK(&ep->endpoint_lock); if (resend) opal_list_prepend(&ep->pending_sends, (opal_list_item_t*)si); else opal_list_append(&ep->pending_sends, (opal_list_item_t*)si); OPAL_THREAD_UNLOCK(&ep->endpoint_lock); } /* * FIFO_MAP(x) defines which FIFO on the receiver should be used * by sender rank x. The map is some many-to-one hash. * * FIFO_MAP_NUM(n) defines how many FIFOs the receiver has for * n senders. * * That is, * * for all 0 <= x < n: * * 0 <= FIFO_MAP(x) < FIFO_MAP_NUM(n) * * For example, using some power-of-two nfifos, we could have * * FIFO_MAP(x) = x & (nfifos-1) * FIFO_MAP_NUM(n) = min(nfifos,n) * * Interesting limits include: * * nfifos very large: In this case, each sender has its * own dedicated FIFO on each receiver and the receiver * has one FIFO per sender. * * nfifos == 1: In this case, all senders use the same * FIFO and each receiver has just one FIFO for all senders. */ #define FIFO_MAP(x) ((x) & (mca_btl_smcuda_component.nfifos - 1)) #define FIFO_MAP_NUM(n) ( (mca_btl_smcuda_component.nfifos) < (n) ? (mca_btl_smcuda_component.nfifos) : (n) ) #define MCA_BTL_SMCUDA_FIFO_WRITE(endpoint_peer, my_smp_rank, \ peer_smp_rank, hdr, resend, retry_pending_sends, rc) \ do { \ sm_fifo_t* fifo = &(mca_btl_smcuda_component.fifo[peer_smp_rank][FIFO_MAP(my_smp_rank)]); \ \ if ( retry_pending_sends ) { \ if ( 0 < opal_list_get_size(&endpoint_peer->pending_sends) ) { \ btl_smcuda_process_pending_sends(endpoint_peer); \ } \ } \ \ opal_atomic_lock(&(fifo->head_lock)); \ /* post fragment */ \ if(sm_fifo_write(hdr, fifo) != OPAL_SUCCESS) { \ add_pending(endpoint_peer, hdr, resend); \ rc = OPAL_ERR_RESOURCE_BUSY; \ } else { \ MCA_BTL_SMCUDA_SIGNAL_PEER(endpoint_peer); \ rc = OPAL_SUCCESS; \ } \ opal_atomic_unlock(&(fifo->head_lock)); \ } while(0) #endif