diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 2d351e4b..a848b711 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -44,15 +44,24 @@ extern struct ib_address_vector hack_ipoib_bcast_av; /** IPoIB MTU */ #define IPOIB_MTU 2048 -/** Number of IPoIB send work queue entries */ +/** Number of IPoIB data send work queue entries */ #define IPOIB_DATA_NUM_SEND_WQES 4 -/** Number of IPoIB receive work queue entries */ -#define IPOIB_DATA_NUM_RECV_WQES 8 +/** Number of IPoIB data receive work queue entries */ +#define IPOIB_DATA_NUM_RECV_WQES 4 -/** Number of IPoIB completion entries */ +/** Number of IPoIB data completion entries */ #define IPOIB_DATA_NUM_CQES 8 +/** Number of IPoIB metadata send work queue entries */ +#define IPOIB_META_NUM_SEND_WQES 4 + +/** Number of IPoIB metadata receive work queue entries */ +#define IPOIB_META_NUM_RECV_WQES 4 + +/** Number of IPoIB metadata completion entries */ +#define IPOIB_META_NUM_CQES 8 + /** An IPoIB queue set */ struct ipoib_queue_set { /** Completion queue */ @@ -84,10 +93,15 @@ struct ipoib_device { **************************************************************************** */ +/** Broadcast QPN used in IPoIB MAC addresses + * + * This is a guaranteed invalid real QPN + */ +#define IPOIB_BROADCAST_QPN 0xffffffffUL + /** Broadcast IPoIB address */ static struct ipoib_mac ipoib_broadcast = { - .gid = { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }, + .qpn = ntohl ( IPOIB_BROADCAST_QPN ), }; /** @@ -244,6 +258,73 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib, return rc; } +/** + * Transmit path record request + * + * @v ipoib IPoIB device + * @v gid Destination GID + * @ret rc Return status code + */ +static int ipoib_get_path_record ( struct ipoib_device *ipoib, + struct ib_gid *gid ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + struct ib_mad_path_record *path_record; + struct ib_address_vector av; + static uint32_t tid = 0; + int rc; + + DBG ( "get_path_record():\n" ); + int get_path_record(struct ib_gid *dgid, uint16_t *dlid_p, + uint8_t *sl_p, uint8_t *rate_p); + uint16_t tmp_dlid; + uint8_t tmp_sl; + uint8_t tmp_rate; + get_path_record ( gid, &tmp_dlid, &tmp_sl, &tmp_rate ); + + DBG ( "ipoib_get_path_record():\n" ); + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( sizeof ( *path_record ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, sizeof ( *path_record ) ); + path_record = iobuf->data; + memset ( path_record, 0, sizeof ( *path_record ) ); + + /* Construct path record request */ + path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + path_record->mad_hdr.class_version = 2; + path_record->mad_hdr.method = IB_MGMT_METHOD_GET; + path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + path_record->mad_hdr.tid = tid++; + path_record->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); + memcpy ( &path_record->sgid, &ibdev->port_gid, + sizeof ( path_record->sgid ) ); + + DBG_HD ( path_record, sizeof ( *path_record ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.dlid = ibdev->sm_lid; + av.dest_qp = IB_SA_QPN; + av.qkey = IB_SA_QKEY; + + /* Post send request */ + if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, + iobuf ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", + ipoib, strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + /** * Transmit packet via IPoIB network device * @@ -256,19 +337,29 @@ static int ipoib_transmit ( struct net_device *netdev, struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + int rc; if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); return -EINVAL; } + DBG ( "TX pseudo-header:\n" ); + DBG_HD ( ipoib_pshdr, sizeof ( *ipoib_pshdr ) ); + if ( ipoib_pshdr->peer.qpn != htonl ( IPOIB_BROADCAST_QPN ) ) { + DBG ( "Get path record\n" ); + rc = ipoib_get_path_record ( ipoib, &ipoib_pshdr->peer.gid ); + free_iob ( iobuf ); + return 0; + } + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); return ib_post_send ( ibdev, ipoib->data.qp, &hack_ipoib_bcast_av, iobuf ); } /** - * Handle IPoIB send completion + * Handle IPoIB data send completion * * @v ibdev Infiniband device * @v qp Queue pair @@ -286,7 +377,7 @@ static void ipoib_data_complete_send ( struct ib_device *ibdev __unused, } /** - * Handle IPoIB receive completion + * Handle IPoIB data receive completion * * @v ibdev Infiniband device * @v qp Queue pair @@ -315,6 +406,61 @@ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, ipoib->data.recv_fill--; } +/** + * Handle IPoIB metadata send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + + DBG ( "Woohoo! METADATA TX completion\n" ); + + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n", + ipoib, completion->syndrome ); + } + free_iob ( iobuf ); +} + +/** + * Handle IPoIB metadata receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + + DBG ( "***************** META TX!!!!!! ********\n" ); + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", + ipoib, completion->syndrome ); + } else { + iob_put ( iobuf, completion->len ); + DBG ( "Metadata RX:\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + } + + ipoib->meta.recv_fill--; + free_iob ( iobuf ); +} + /** * Refill IPoIB receive ring * @@ -349,6 +495,9 @@ static void ipoib_poll ( struct net_device *netdev ) { ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, ipoib_data_complete_recv ); + ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, + ipoib_meta_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); } @@ -382,7 +531,8 @@ static int ipoib_open ( struct net_device *netdev ) { return rc; } - /* Fill receive ring */ + /* Fill receive rings */ + ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); return 0; @@ -436,6 +586,17 @@ int ipoib_probe ( struct ib_device *ibdev ) { ipoib->netdev = netdev; ipoib->ibdev = ibdev; + /* Allocate metadata queue set */ + if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta, + IPOIB_META_NUM_CQES, + IPOIB_META_NUM_SEND_WQES, + IPOIB_META_NUM_RECV_WQES, + IB_SA_QKEY ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n", + ipoib, strerror ( rc ) ); + goto err_create_meta_qset; + } + /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, @@ -461,6 +622,8 @@ int ipoib_probe ( struct ib_device *ibdev ) { err_register_netdev: ipoib_destroy_qset ( ipoib, &ipoib->data ); err_create_data_qset: + ipoib_destroy_qset ( ipoib, &ipoib->meta ); + err_create_meta_qset: netdev_nullify ( netdev ); netdev_put ( netdev ); return rc; diff --git a/src/drivers/net/mlx_ipoib/ib_mad.c b/src/drivers/net/mlx_ipoib/ib_mad.c index 73b49f20..4da4677b 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.c +++ b/src/drivers/net/mlx_ipoib/ib_mad.c @@ -264,7 +264,7 @@ static int join_mc_group(__u32 * qkey_p, __u16 * mlid_p, __u8 join) return is_good ? 0 : -1; } -static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, +int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, u8 * rate_p) { struct path_record_mad_st *mad, *rcv_mad; @@ -321,6 +321,9 @@ static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, cpu_to_be_buf(mad, sizeof *mad); memcpy(mad->path_record.sgid.raw, ib_data.port_gid.raw, 16); + DBG ( "data:\n" ); + DBG_HD ( mad, sizeof ( *mad ) ); + rc = post_send_req(qp, snd_wqe, 1); if (rc) { eprintf(""); diff --git a/src/drivers/net/mlx_ipoib/ib_mad.h b/src/drivers/net/mlx_ipoib/ib_mad.h index 5ffb5404..51b90d21 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.h +++ b/src/drivers/net/mlx_ipoib/ib_mad.h @@ -104,7 +104,7 @@ union mad_u { struct ib_mad_st mad; } __attribute__ ((packed)); -static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, +int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, __u8 * rate_p); #endif /* __ib_mad_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 08207819..fb98d543 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -174,8 +174,8 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, size_t dump_len = in_len; if ( dump_len > 256 ) dump_len = 256; - DBG ( "Input:\n" ); - DBG_HD ( in, dump_len ); + // DBG ( "Input:\n" ); + // DBG_HD ( in, dump_len ); } /* Issue command */ @@ -212,8 +212,8 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, size_t dump_len = out_len; if ( dump_len > 256 ) dump_len = 256; - DBG ( "Output:\n" ); - DBG_HD ( out, dump_len ); + // DBG ( "Output:\n" ); + // DBG_HD ( out, dump_len ); } return 0; @@ -749,7 +749,7 @@ static void arbel_ring_doorbell ( struct arbel *arbel, /** GID used for GID-less send work queue entries */ static const struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } }; /** @@ -805,6 +805,14 @@ static int arbel_post_send ( struct ib_device *ibdev, MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); + + if ( ! av->gid_present ) { + DBG ( "no_gid:\n" ); + DBG_HD ( &arbel_no_gid, sizeof ( arbel_no_gid ) ); + DBG ( "gid:\n" ); + DBG_HD ( &wqe->ud.u.dwords[4], 16 ); + } + MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); @@ -821,6 +829,11 @@ static int arbel_post_send ( struct ib_device *ibdev, f, 1, always1, 1 ); + + DBG ( "arbel_post_send()\n" ); + DBG_HD ( wqe, sizeof ( *wqe ) ); + + /* Update doorbell record */ barrier(); qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; @@ -1248,6 +1261,17 @@ static int arbel_get_port_gid ( struct arbel *arbel, return 0; } +static int arbel_get_sm_lid ( struct arbel *arbel, + unsigned long *sm_lid ) { + struct ib_mad_port_info port_info; + int rc; + + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + return rc; + *sm_lid = ntohs ( port_info.mastersm_lid ); + return 0; +} + static int arbel_get_broadcast_gid ( struct arbel *arbel, struct ib_gid *broadcast_gid ) { static const struct ib_gid ipv4_broadcast_gid = { @@ -1323,6 +1347,13 @@ static int arbel_probe ( struct pci_device *pci, arbel->limits.reserved_qps = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + /* Get subnet manager LID */ + if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine subnet manager " + "LID: %s\n", arbel, strerror ( rc ) ); + goto err_get_sm_lid; + } + /* Get port GID */ if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", @@ -1362,6 +1393,7 @@ static int arbel_probe ( struct pci_device *pci, err_ipoib_probe: err_get_broadcast_gid: err_get_port_gid: + err_get_sm_lid: err_query_dev_lim: ib_driver_close ( 0 ); err_ib_driver_init: diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 32f9d675..2d1d9433 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -10,6 +10,12 @@ #include #include +/** Subnet administrator QPN */ +#define IB_SA_QPN 1 + +/** Subnet administrator queue key */ +#define IB_SA_QKEY 0x80010000UL + /** An Infiniband Global Identifier */ struct ib_gid { uint8_t bytes[16]; @@ -250,7 +256,9 @@ struct ib_device { /** Port GID */ struct ib_gid port_gid; /** Broadcast GID */ - struct ib_gid broadcast_gid; + struct ib_gid broadcast_gid; + /** Subnet manager LID */ + unsigned long sm_lid; /** Underlying device */ struct device *dev; /** Infiniband operations */ @@ -422,6 +430,31 @@ static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) { #define IB_SMP_ATTR_LED_INFO 0x0031 #define IB_SMP_ATTR_VENDOR_MASK 0xFF00 +#define IB_SA_ATTR_MC_MEMBER_REC 0x38 +#define IB_SA_ATTR_PATH_REC 0x35 + +#define IB_SA_MCMEMBER_REC_MGID (1<<0) +#define IB_SA_MCMEMBER_REC_PORT_GID (1<<1) +#define IB_SA_MCMEMBER_REC_QKEY (1<<2) +#define IB_SA_MCMEMBER_REC_MLID (1<<3) +#define IB_SA_MCMEMBER_REC_MTU_SELECTOR (1<<4) +#define IB_SA_MCMEMBER_REC_MTU (1<<5) +#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS (1<<6) +#define IB_SA_MCMEMBER_REC_PKEY (1<<7) +#define IB_SA_MCMEMBER_REC_RATE_SELECTOR (1<<8) +#define IB_SA_MCMEMBER_REC_RATE (1<<9) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR (1<<10) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME (1<<11) +#define IB_SA_MCMEMBER_REC_SL (1<<12) +#define IB_SA_MCMEMBER_REC_FLOW_LABEL (1<<13) +#define IB_SA_MCMEMBER_REC_HOP_LIMIT (1<<14) +#define IB_SA_MCMEMBER_REC_SCOPE (1<<15) +#define IB_SA_MCMEMBER_REC_JOIN_STATE (1<<16) +#define IB_SA_MCMEMBER_REC_PROXY_JOIN (1<<17) + +#define IB_SA_PATH_REC_DGID (1<<2) +#define IB_SA_PATH_REC_SGID (1<<3) + struct ib_mad_hdr { uint8_t base_version; uint8_t mgmt_class; @@ -435,6 +468,17 @@ struct ib_mad_hdr { uint32_t attr_mod; } __attribute__ (( packed )); +struct ib_sa_hdr { + uint32_t sm_key[2]; + uint16_t reserved; + uint16_t attrib_offset; + uint32_t comp_mask[2]; +} __attribute__ (( packed )); + +struct ib_rmpp_hdr { + uint32_t raw[3]; +} __attribute__ (( packed )); + struct ib_mad_data { struct ib_mad_hdr mad_hdr; uint8_t data[232]; @@ -475,12 +519,29 @@ struct ib_mad_pkey_table { uint16_t pkey[16][2]; } __attribute__ (( packed )); +struct ib_mad_path_record { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + uint32_t reserved0[2]; + struct ib_gid dgid; + struct ib_gid sgid; + uint16_t dlid; + uint16_t slid; + uint32_t hop_limit__flow_label__raw_traffic; + uint32_t pkey__numb_path__reversible__tclass; + uint32_t rate__rate_selector__mtu__mtu_selector__sl__reserved; + uint32_t preference__packet_lifetime__packet_lifetime_selector; + uint32_t reserved1[35]; +} __attribute__ (( packed )); + union ib_mad { struct ib_mad_hdr mad_hdr; struct ib_mad_data data; struct ib_mad_guid_info guid_info; struct ib_mad_port_info port_info; struct ib_mad_pkey_table pkey_table; + struct ib_mad_path_record path_record; } __attribute__ (( packed )); #endif /* _GPXE_INFINIBAND_H */