From 9d08b7c692aac5b1790555f8fc28ddb52ef28bb5 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 20:29:44 +0100 Subject: [PATCH] Starting to introduce an Infiniband device abstraction --- src/drivers/net/mlx_ipoib/mt25218.c | 138 ++++++++++++++++++++++++++++ src/include/gpxe/infiniband.h | 63 +++++++++++++ 2 files changed, 201 insertions(+) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 3cbca49a..c6015fb2 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -72,6 +72,7 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct mlx_nic *mlx = netdev->priv; + ud_av_t av = iobuf->data; ud_send_wqe_t snd_wqe; int rc; @@ -222,6 +223,143 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; + + +int ib_alloc_wqe ( struct ib_work_queue *wq, struct io_buffer *iobuf ) { + unsigned int wqe_idx; + unsigned int new_write_ptr; + + /* Allocate queue entry */ + wqe_idx = new_write_ptr = wq->write_ptr; + if ( wq->iobuf[wqe_idx] ) + return -ENOBUFS; + wq->iobuf[wqe_idx] = iobuf; + + /* Update write pointer */ + new_write_ptr++; + new_write_ptr &= ( wq->num_wqes - 1 ); + wq->write_ptr = new_write_ptr; + + return wqe_idx; +} + +static inline void ib_free_wqe ( struct ib_work_queue *wq, int wqe_idx ) { + assert ( wq->iobuf[wqe_idx] != NULL ); + wq->iobuf[wqe_idx] = NULL; +} + +static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ) { + struct mlx *mlx = ibdev->priv; + struct ib_work_queue *wq = &qp->send; + struct mlx_work_queue *mlx_wq = wq->priv; + unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); + unsigned int prev_wqe_idx; + struct ud_send_wqe_st *prev_wqe; + unsigned int wqe_idx; + struct ud_send_wqe_st *wqe; + struct ib_gid *gid; + size_t nds; + struct send_doorbell_st doorbell; + + /* Allocate work queue entry */ + prev_wqe_idx = wq->posted; + wqe_idx = ( prev_wqe_index + 1 ); + if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { + DBGC ( mlx, "MLX %p send queue full", mlx ); + return -ENOBUFS; + } + prev_wqe = &mlx_wq->wqe[prev_wqe_idx & wqe_idx_mask]; + wqe = &mlx_wq->wqe[wqe_idx & wqe_idx_mask]; + + /* Construct work queue entry */ + memset ( &wqe->next.control, 0, + sizeof ( wqe->next.control ) ); + MLX_POPULATE_1 ( &wqe->next.control, + arbelprm_wqe_segment_ctrl_send_st, 0, + always1, 1 ); + memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0, + pd, GLOBAL_PD, + port_number, mlx->port ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1, + rlid, av->remote_lid, + g, av->gid_present ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2, + max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ), + msg, 3 ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, + sl, av->sl ); + gid = ( av->gid_present ? av->gid : &ib_no_gid ); + memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), + gid, sizeof ( *gid ) ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, + destination_qp, av->dest_qp ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, + q_key, av->qkey ); + wqe->mpointer[0].local_addr_l = + cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); + + /* Update previous work queue entry's "next" field */ + nds = ( offsetof ( typeof ( *wqe ), mpointer ) + + sizeof ( wqe->mpointer[0] ) ); + MLX_MODIFY_1 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, + nopcode, XDEV_NOPCODE_SEND ); + MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, + nds, nds, + f, 1, + always1, 1 ); + + /* Ring doorbell */ + + doorbell index is a property of the queue pair + + + MLX_POPULATE_1 ( mlx_wq->send_uar_context, arbelprm_qp_db_record_st, 0, + counter, ( wqe_idx & 0xffff ) ); + memset ( &doorbell, 0, sizeof ( doorbell ) ); + MLX_POPULATE_4 ( &doorbell, arbelprm_send_doorbell_st, 0, + nopcode, XDEV_NOPCODE_SEND, + f, 1, + wqe_counter, ( prev_wqe_idx & 0xffff ), + wqe_cnt, 1 ); + MLX_POPULATE_2 ( &doorbell, arbelprm_send_doorbell_st, 1, + nds, nds, + qpn, qp->qpn ); + barrier(); + + wq->posted = wqe_idx; + + + struct mlx_nic *mlx = netdev->priv; + ud_av_t av = iobuf->data; + ud_send_wqe_t snd_wqe; + int rc; + + snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); + if ( ! snd_wqe ) { + DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); + return -ENOBUFS; + } + + prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, + iobuf->data, 0, iob_len ( iobuf ), 0 ); + if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { + DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", + mlx, snd_wqe, strerror ( rc ) ); + free_wqe ( snd_wqe ); + return rc; + } + + +} + +static struct ib_device_operations mlx_ib_operations = { + .post_send = mlx_post_send, +}; + /** * Remove PCI device * diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 9f126b49..22a8a982 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -61,6 +61,69 @@ struct ibhdr { uint16_t reserved; } __attribute__ (( packed )); +/** An Infiniband Work Queue */ +struct ib_work_queue { + /** Number of work queue entries */ + unsigned int num_wqes; + /** Posted index + * + * This is the index of the most recently posted entry. + */ + unsigned int posted; + /** Driver-private data + * + * Typically used to hold the address of the work queue. + */ + void *priv; + /** I/O buffers assigned to work queue */ + struct io_buffer *iobuf[0]; +}; + +/** An Infiniband Queue Pair */ +struct ib_queue_pair { + /** Queue Pair Number */ + uint32_t qpn; + /** Send queue */ + struct ib_work_queue send; + /** Receive queue */ + struct ib_work_queue recv; +}; + +/** An Infiniband Address Vector */ +struct ib_address_vector { + +}; + +/** + * Infiniband device operations + * + * These represent a subset of the Infiniband Verbs. + */ +struct ib_device_operations { + /** Post Send work queue entry + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer + * @v av Address vector + * @v qp Queue pair + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_send ) ( struct ib_device *ibdev, + struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ); +}; + + + + + + extern struct ll_protocol infiniband_protocol; extern const char * ib_ntoa ( const void *ll_addr );