From 4e78a53cf26b85736123eee29d23d637b4a3883f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 05:04:58 +0100 Subject: [PATCH] IPoIB code separated out to ipoib.c. --- src/drivers/net/ipoib.c | 411 +++++++++++++++++++++++++ src/drivers/net/mlx_ipoib/ib_driver.c | 5 +- src/drivers/net/mlx_ipoib/ib_driver.h | 2 +- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 +- src/drivers/net/mlx_ipoib/ipoib.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 143 ++++++++- src/include/gpxe/errfile.h | 2 + src/include/gpxe/infiniband.h | 155 +++++++--- src/include/gpxe/ipoib.h | 78 +++++ src/net/infiniband.c | 32 +- 10 files changed, 786 insertions(+), 46 deletions(-) create mode 100644 src/drivers/net/ipoib.c create mode 100644 src/include/gpxe/ipoib.h diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c new file mode 100644 index 00000000..9eed6b39 --- /dev/null +++ b/src/drivers/net/ipoib.c @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * IP over Infiniband + */ + + + + + +extern unsigned long hack_ipoib_qkey; +extern struct ib_address_vector hack_ipoib_bcast_av; + + + +/** IPoIB MTU */ +#define IPOIB_MTU 2048 + +/** Number of IPoIB send work queue entries */ +#define IPOIB_NUM_SEND_WQES 8 + +/** Number of IPoIB receive work queue entries */ +#define IPOIB_NUM_RECV_WQES 8 + +/** Number of IPoIB completion entries */ +#define IPOIB_NUM_CQES 8 + +struct ipoib_device { + struct ib_device *ibdev; + struct ib_completion_queue *cq; + struct ib_queue_pair *qp; + unsigned int rx_fill; +}; + +/**************************************************************************** + * + * IPoIB link layer + * + **************************************************************************** + */ + +/** Broadcast IPoIB address */ +static struct ipoib_mac ipoib_broadcast = { + .gid = { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }, +}; + +/** + * Transmit IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Link-layer destination address + * + * Prepends the IPoIB link-layer header and transmits the packet. + */ +static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, + const void *ll_dest ) { + struct ipoib_hdr *ipoib_hdr = + iob_push ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Build IPoIB header */ + memcpy ( &ipoib_hdr->pseudo.peer, ll_dest, + sizeof ( ipoib_hdr->pseudo.peer ) ); + ipoib_hdr->real.proto = net_protocol->net_proto; + ipoib_hdr->real.reserved = 0; + + /* Hand off to network device */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * + * Strips off the IPoIB link-layer header and passes up to the + * network-layer protocol. + */ +static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + struct ipoib_hdr *ipoib_hdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { + DBG ( "IPoIB packet too short (%d bytes)\n", + iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EINVAL; + } + + /* Strip off IPoIB header */ + iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Hand off to network-layer protocol */ + return net_rx ( iobuf, netdev, ipoib_hdr->real.proto, + &ipoib_hdr->pseudo.peer ); +} + +/** + * Transcribe IPoIB address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * ipoib_ntoa ( const void *ll_addr ) { + static char buf[61]; + const uint8_t *ipoib_addr = ll_addr; + unsigned int i; + char *p = buf; + + for ( i = 0 ; i < IPOIB_ALEN ; i++ ) { + p += sprintf ( p, ":%02x", ipoib_addr[i] ); + } + return ( buf + 1 ); +} + +/** IPoIB protocol */ +struct ll_protocol ipoib_protocol __ll_protocol = { + .name = "IPoIB", + .ll_proto = htons ( ARPHRD_INFINIBAND ), + .ll_addr_len = IPOIB_ALEN, + .ll_header_len = IPOIB_HLEN, + .ll_broadcast = ( uint8_t * ) &ipoib_broadcast, + .tx = ipoib_tx, + .rx = ipoib_rx, + .ntoa = ipoib_ntoa, +}; + +/**************************************************************************** + * + * IPoIB network device + * + **************************************************************************** + */ + +/** + * Transmit packet via IPoIB network device + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int ipoib_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + + if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { + DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); + return -EINVAL; + } + + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); + return ib_post_send ( ibdev, ipoib->qp, + &hack_ipoib_bcast_av, iobuf ); +} + +/** + * Handle IPoIB send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +/** + * Handle IPoIB receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + struct ib_global_route_header *grh = iobuf->data; + struct ipoib_pseudo_hdr *ipoib_pshdr; + + if ( completion->syndrome ) { + netdev_rx_err ( netdev, iobuf, -EIO ); + } else { + iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, ( sizeof ( *grh ) - + sizeof ( *ipoib_pshdr ) ) ); + /* FIXME: fill in a MAC address for the sake of AoE! */ + netdev_rx ( netdev, iobuf ); + } + + ipoib->rx_fill--; +} + +/** + * Refill IPoIB receive ring + * + * @v ipoib IPoIB device + */ +static void ipoib_refill_recv ( struct ipoib_device *ipoib ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + int rc; + + while ( ipoib->rx_fill < IPOIB_NUM_RECV_WQES ) { + iobuf = alloc_iob ( IPOIB_MTU ); + if ( ! iobuf ) + break; + if ( ( rc = ib_post_recv ( ibdev, ipoib->qp, + iobuf ) ) != 0 ) { + free_iob ( iobuf ); + break; + } + ipoib->rx_fill++; + } +} + +/** + * Poll IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_poll ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + ib_poll_cq ( ibdev, ipoib->cq, ipoib_complete_send, + ipoib_complete_recv ); + ipoib_refill_recv ( ipoib ); +} + +/** + * Enable/disable interrupts on IPoIB network device + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void ipoib_irq ( struct net_device *netdev __unused, + int enable __unused ) { + /* No implementation */ +} + +/** + * Open IPoIB network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ipoib_open ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + int rc; + + /* Attach to broadcast multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, ipoib->qp, + &ibdev->broadcast_gid ) ) != 0 ) { + DBG ( "Could not attach to broadcast GID: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Fill receive ring */ + ipoib_refill_recv ( ipoib ); + + return 0; +} + +/** + * Close IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_close ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + /* Detach from broadcast multicast GID */ + ib_mcast_detach ( ibdev, ipoib->qp, &ipoib_broadcast.gid ); + + /* FIXME: should probably flush the receive ring */ +} + +/** IPoIB network device operations */ +static struct net_device_operations ipoib_operations = { + .open = ipoib_open, + .close = ipoib_close, + .transmit = ipoib_transmit, + .poll = ipoib_poll, + .irq = ipoib_irq, +}; + +/** + * Probe IPoIB device + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int ipoib_probe ( struct ib_device *ibdev ) { + struct net_device *netdev; + struct ipoib_device *ipoib; + struct ipoib_mac *mac; + int rc; + + /* Allocate network device */ + netdev = alloc_ipoibdev ( sizeof ( *ipoib ) ); + if ( ! netdev ) + return -ENOMEM; + netdev_init ( netdev, &ipoib_operations ); + ipoib = netdev->priv; + ib_set_ownerdata ( ibdev, netdev ); + netdev->dev = ibdev->dev; + memset ( ipoib, 0, sizeof ( *ipoib ) ); + ipoib->ibdev = ibdev; + + /* Allocate completion queue */ + ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES ); + if ( ! ipoib->cq ) { + DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n", + ipoib ); + rc = -ENOMEM; + goto err_create_cq; + } + + /* Allocate queue pair */ + ipoib->qp = ib_create_qp ( ibdev, IPOIB_NUM_SEND_WQES, + ipoib->cq, IPOIB_NUM_RECV_WQES, + ipoib->cq, hack_ipoib_qkey ); + if ( ! ipoib->qp ) { + DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n", + ipoib ); + rc = -ENOMEM; + goto err_create_qp; + } + ipoib->qp->owner_priv = netdev; + + /* Construct MAC address */ + mac = ( ( struct ipoib_mac * ) netdev->ll_addr ); + mac->qpn = htonl ( ipoib->qp->qpn ); + memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + return 0; + + err_register_netdev: + ib_destroy_qp ( ibdev, ipoib->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, ipoib->cq ); + err_create_cq: + netdev_nullify ( netdev ); + netdev_put ( netdev ); + return rc; +} + +/** + * Remove IPoIB device + * + * @v ibdev Infiniband device + */ +void ipoib_remove ( struct ib_device *ibdev ) { + struct net_device *netdev = ib_get_ownerdata ( ibdev ); + + unregister_netdev ( netdev ); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} diff --git a/src/drivers/net/mlx_ipoib/ib_driver.c b/src/drivers/net/mlx_ipoib/ib_driver.c index 590fb94d..34d4cbaa 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.c +++ b/src/drivers/net/mlx_ipoib/ib_driver.c @@ -63,6 +63,7 @@ static int wait_logic_link_up(__u8 port) } unsigned long ipoib_qkey; +unsigned long hack_ipoib_qkey; static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) { @@ -149,7 +150,7 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) qkey, mlid); } - ipoib_qkey = qkey; + hack_ipoib_qkey = ipoib_qkey = qkey; #if 0 rc = create_ipoib_qp(&ib_data.ipoib_qp, @@ -285,7 +286,7 @@ static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p) end = currticks() + tout; do { - rc = ib_poll_cq(cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cqx(cqh, &ib_cqe, &num_cqes); if (rc) return rc; diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 6dca8d30..7fc57364 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -153,7 +153,7 @@ static int gw_read_cr(__u32 addr, __u32 * result); static int gw_write_cr(__u32 addr, __u32 data); static ud_av_t alloc_ud_av(void); static void free_ud_av(ud_av_t av); -static int ib_poll_cq(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); +static int ib_poll_cqx(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); static int add_qp_to_mcast_group(union ib_gid_u mcast_gid, __u8 add); static int clear_interrupt(void); static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p); diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index ba1108a3..a5d251d4 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1730,7 +1730,7 @@ static void dev2ib_cqe(struct ib_cqe_st *ib_cqe_p, union cqe_st *cqe_p) byte_cnt); } -static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) +static int ib_poll_cqx(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) { int rc; union cqe_st cqe; diff --git a/src/drivers/net/mlx_ipoib/ipoib.c b/src/drivers/net/mlx_ipoib/ipoib.c index d4124f21..d8dd6bf6 100644 --- a/src/drivers/net/mlx_ipoib/ipoib.c +++ b/src/drivers/net/mlx_ipoib/ipoib.c @@ -879,7 +879,7 @@ static int ipoib_read_packet(__u16 * prot_p, void *data, unsigned int *size_p, void *buf, *out_buf; __u16 prot_type; - rc = ib_poll_cq(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cqx(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); if (rc) { return rc; } diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index aed6d208..6aa4e7fe 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -16,6 +16,7 @@ Skeleton NIC driver for Etherboot #include #include #include +#include /* to get some global routines like printf */ #include "etherboot.h" @@ -29,11 +30,18 @@ Skeleton NIC driver for Etherboot #include "arbel.h" +struct ib_address_vector hack_ipoib_bcast_av; + + + + static const struct ib_gid arbel_no_gid = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }; +#if 0 + #define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { @@ -275,6 +283,7 @@ static void mlx_poll ( struct net_device *netdev ) { &static_ipoib_send_cq, #endif temp_complete_send, temp_complete_recv ); +#if 0 arbel_poll_cq ( &static_ibdev, #if CREATE_OWN mlx->own_recv_cq, @@ -282,6 +291,7 @@ static void mlx_poll ( struct net_device *netdev ) { &static_ipoib_recv_cq, #endif temp_complete_send, temp_complete_recv ); +#endif mlx_refill_rx ( netdev ); } @@ -308,6 +318,8 @@ static struct net_device_operations mlx_operations = { }; +#endif /* 0 */ + /*************************************************************************** @@ -1488,6 +1500,8 @@ static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { +#if 0 + /** * Probe PCI device * @@ -1576,14 +1590,17 @@ static int arbel_probe ( struct pci_device *pci, DBG ( "Could not create send CQ\n" ); return -EIO; } +#if 0 mlx->own_recv_cq = ib_create_cq ( ibdev, 32 ); if ( ! mlx->own_recv_cq ) { DBG ( "Could not create send CQ\n" ); return -EIO; } +#endif mlx->own_qp = ib_create_qp ( ibdev, NUM_IPOIB_SND_WQES, mlx->own_send_cq, NUM_IPOIB_RCV_WQES, - mlx->own_recv_cq, ipoib_qkey ); + //mlx->own_recv_cq, ipoib_qkey ); + mlx->own_send_cq, ipoib_qkey ); if ( ! mlx->own_qp ) { DBG ( "Could not create QP\n" ); return -EIO; @@ -1621,6 +1638,22 @@ static int arbel_probe ( struct pci_device *pci, } #endif + ibdev->dev = &pci->dev; + + + struct ud_av_st *bcast_av = mlx->bcast_av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; + struct ib_address_vector *av = &hack_ipoib_bcast_av; + av->dest_qp = bcast_av->dest_qp; + av->qkey = bcast_av->qkey; + av->dlid = MLX_GET ( bav, rlid ); + av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); + av->sl = MLX_GET ( bav, sl ); + av->gid_present = 1; + memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); + + /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; @@ -1650,6 +1683,114 @@ static void arbel_remove ( struct pci_device *pci ) { netdev_put ( netdev ); } +#endif /* 0 */ + + + +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int arbel_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct ib_device *ibdev; + struct arbelprm_query_dev_lim dev_lim; + struct arbel *arbel; + udqp_t qph; + int rc; + + /* Allocate Infiniband device */ + ibdev = alloc_ibdev ( sizeof ( *arbel ) ); + if ( ! ibdev ) + return -ENOMEM; + ibdev->op = &arbel_ib_operations; + pci_set_drvdata ( pci, ibdev ); + ibdev->dev = &pci->dev; + arbel = ibdev->dev_priv; + memset ( arbel, 0, sizeof ( *arbel ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Initialise hardware */ + if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) + goto err_ib_driver_init; + + /* Hack up IB structures */ + arbel->config = memfree_pci_dev.cr_space; + arbel->mailbox_in = dev_buffers_p->inprm_buf; + arbel->mailbox_out = dev_buffers_p->outprm_buf; + arbel->uar = memfree_pci_dev.uar; + arbel->db_rec = dev_ib_data.uar_context_base; + arbel->reserved_lkey = dev_ib_data.mkey; + arbel->eqn = dev_ib_data.eq.eqn; + + /* Get device limits */ + if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get device limits: %s\n", + arbel, strerror ( rc ) ); + goto err_query_dev_lim; + } + arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + + /* Get port GID */ + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + goto err_get_port_gid; + } + + struct ud_av_st *bcast_av = ib_data.bcast_av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; + struct ib_address_vector *av = &hack_ipoib_bcast_av; + av->dest_qp = bcast_av->dest_qp; + av->qkey = bcast_av->qkey; + av->dlid = MLX_GET ( bav, rlid ); + av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); + av->sl = MLX_GET ( bav, sl ); + av->gid_present = 1; + memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); + + memcpy ( &ibdev->broadcast_gid, &ib_data.bcast_gid, 16 ); + + /* Add IPoIB device */ + if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", + arbel, strerror ( rc ) ); + goto err_ipoib_probe; + } + + return 0; + + err_ipoib_probe: + err_get_port_gid: + err_query_dev_lim: + ib_driver_close ( 0 ); + err_ib_driver_init: + free_ibdev ( ibdev ); + return rc; +} + +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void arbel_remove ( struct pci_device *pci ) { + struct ib_device *ibdev = pci_get_drvdata ( pci ); + + ipoib_remove ( ibdev ); + ib_driver_close ( 0 ); +} + static struct pci_device_id arbel_nics[] = { PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 3413f9cf..325d2387 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -101,6 +101,8 @@ #define ERRFILE_via_rhine ( ERRFILE_DRIVER | 0x00440000 ) #define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 ) #define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 ) +#define ERRFILE_ipoib ( ERRFILE_DRIVER | 0x00470000 ) +#define ERRFILE_mt25218 ( ERRFILE_DRIVER | 0x00480000 ) #define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 3f09808c..e9e0121d 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -8,7 +8,43 @@ */ #include -#include +#include + + + +#if 0 +/** Infiniband MAC address length */ +#define IB_ALEN 20 + +/** An Infiniband MAC address */ +struct ib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + +/** Infiniband link-layer header length */ +#define IB_HLEN 4 + +/** An Infiniband link-layer header */ +struct ibhdr { + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); +#endif + + + + + + + /** An Infiniband Global Identifier */ struct ib_gid { @@ -36,33 +72,6 @@ struct ib_global_route_header { struct ib_gid dgid; } __attribute__ (( packed )); -/** Infiniband MAC address length */ -#define IB_ALEN 20 - -/** An Infiniband MAC address */ -struct ib_mac { - /** Queue pair number - * - * MSB must be zero; QPNs are only 24-bit. - */ - uint32_t qpn; - /** Port GID */ - struct ib_gid gid; -} __attribute__ (( packed )); - -/** Infiniband link-layer header length */ -#define IB_HLEN 4 - -/** An Infiniband link-layer header */ -struct ibhdr { - /** Network-layer protocol */ - uint16_t proto; - /** Reserved, must be zero */ - uint16_t reserved; -} __attribute__ (( packed )); - - - struct ib_device; struct ib_queue_pair; struct ib_completion_queue; @@ -223,8 +232,7 @@ struct ib_device_operations { struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ); - /** - * Post receive work queue entry + /** Post receive work queue entry * * @v ibdev Infiniband device * @v qp Queue pair @@ -252,8 +260,7 @@ struct ib_device_operations { struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ); - /** - * Attach to multicast group + /** Attach to multicast group * * @v ibdev Infiniband device * @v qp Queue pair @@ -263,8 +270,7 @@ struct ib_device_operations { int ( * mcast_attach ) ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_gid *gid ); - /** - * Detach from multicast group + /** Detach from multicast group * * @v ibdev Infiniband device * @v qp Queue pair @@ -276,13 +282,19 @@ struct ib_device_operations { }; /** An Infiniband device */ -struct ib_device { +struct ib_device { /** Port GID */ struct ib_gid port_gid; + /** Broadcast GID */ + struct ib_gid broadcast_gid; + /** Underlying device */ + struct device *dev; /** Infiniband operations */ struct ib_device_operations *op; /** Device private data */ void *dev_priv; + /** Owner private data */ + void *owner_priv; }; extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, @@ -297,6 +309,52 @@ extern void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); +extern struct ib_device * alloc_ibdev ( size_t priv_size ); +extern void free_ibdev ( struct ib_device *ibdev ); + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *av, struct io_buffer *iobuf ) { + return ibdev->op->post_send ( ibdev, qp, av, iobuf ); +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + return ibdev->op->post_recv ( ibdev, qp, iobuf ); +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + */ +static inline __attribute__ (( always_inline )) void +ib_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, + ib_completer_t complete_send, ib_completer_t complete_recv ) { + ibdev->op->poll_cq ( ibdev, cq, complete_send, complete_recv ); +} + /** * Attach to multicast group @@ -325,6 +383,27 @@ ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, ibdev->op->mcast_detach ( ibdev, qp, gid ); } +/** + * Set Infiniband owner-private data + * + * @v pci Infiniband device + * @v priv Private data + */ +static inline void ib_set_ownerdata ( struct ib_device *ibdev, + void *owner_priv ) { + ibdev->owner_priv = owner_priv; +} + +/** + * Get Infiniband owner-private data + * + * @v pci Infiniband device + * @ret priv Private data + */ +static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) { + return ibdev->owner_priv; +} + /***************************************************************************** * * Management datagrams @@ -435,9 +514,7 @@ union ib_mad { - - - +#if 0 extern struct ll_protocol infiniband_protocol; @@ -459,4 +536,6 @@ static inline struct net_device * alloc_ibdev ( size_t priv_size ) { return netdev; } +#endif + #endif /* _GPXE_INFINIBAND_H */ diff --git a/src/include/gpxe/ipoib.h b/src/include/gpxe/ipoib.h new file mode 100644 index 00000000..0551687d --- /dev/null +++ b/src/include/gpxe/ipoib.h @@ -0,0 +1,78 @@ +#ifndef _GPXE_IPOIB_H +#define _GPXE_IPOIB_H + +/** @file + * + * IP over Infiniband + */ + +#include + +/** IPoIB MAC address length */ +#define IPOIB_ALEN 20 + +/** An IPoIB MAC address */ +struct ipoib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + +/** IPoIB link-layer header length */ +#define IPOIB_HLEN 24 + +/** + * IPoIB link-layer header pseudo portion + * + * This part doesn't actually exist on the wire, but it provides a + * convenient way to fit into the typical network device model. + */ +struct ipoib_pseudo_hdr { + /** Peer address */ + struct ipoib_mac peer; +} __attribute__ (( packed )); + +/** IPoIB link-layer header real portion */ +struct ipoib_real_hdr { + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); + +/** An IPoIB link-layer header */ +struct ipoib_hdr { + /** Pseudo portion */ + struct ipoib_pseudo_hdr pseudo; + /** Real portion */ + struct ipoib_real_hdr real; +} __attribute__ (( packed )); + +extern struct ll_protocol ipoib_protocol; + +extern const char * ipoib_ntoa ( const void *ll_addr ); + +/** + * Allocate IPoIB device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = &ipoib_protocol; + } + return netdev; +} + +extern int ipoib_probe ( struct ib_device *ibdev ); +extern void ipoib_remove ( struct ib_device *ibdev ); + +#endif /* _GPXE_IPOIB_H */ diff --git a/src/net/infiniband.c b/src/net/infiniband.c index a9ca0e31..7a68b7d4 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -153,8 +153,6 @@ void ib_destroy_qp ( struct ib_device *ibdev, free ( qp ); } - - /** * Find work queue belonging to completion queue * @@ -174,7 +172,35 @@ struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, return NULL; } +/** + * Allocate Infiniband device + * + * @v priv_size Size of private data area + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * alloc_ibdev ( size_t priv_size ) { + struct ib_device *ibdev; + size_t total_len; + total_len = ( sizeof ( *ibdev ) + priv_size ); + ibdev = zalloc ( total_len ); + if ( ibdev ) { + ibdev->dev_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); + } + return ibdev; +} + +/** + * Free Infiniband device + * + * @v ibdev Infiniband device + */ +void free_ibdev ( struct ib_device *ibdev ) { + free ( ibdev ); +} + + +#if 0 /** Infiniband broadcast MAC address */ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; @@ -259,3 +285,5 @@ struct ll_protocol infiniband_protocol __ll_protocol = { .rx = ib_rx, .ntoa = ib_ntoa, }; + +#endif