From d6b47871de4a99f633f9ea91c3e1e31adeca28ea Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 7 Jul 2009 02:01:21 +0100 Subject: [PATCH] [infiniband] Provide a general mechanism for path record lookups Generalise out the path record lookup code from IPoIB. --- src/drivers/net/ipoib.c | 116 ++--------------- src/include/gpxe/errfile.h | 1 + src/include/gpxe/ib_pathrec.h | 17 +++ src/net/infiniband/ib_pathrec.c | 221 ++++++++++++++++++++++++++++++++ 4 files changed, 247 insertions(+), 108 deletions(-) create mode 100644 src/include/gpxe/ib_pathrec.h create mode 100644 src/net/infiniband/ib_pathrec.c diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index d6815ec5..dd04a43c 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -29,6 +29,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #include #include #include +#include #include /** @file @@ -78,9 +79,6 @@ struct ipoib_device { int broadcast_attached; }; -/** TID half used to identify get path record replies */ -#define IPOIB_TID_GET_PATH_REC 0x11111111UL - /** TID half used to identify multicast member record replies */ #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL @@ -118,12 +116,6 @@ struct ipoib_peer { uint8_t key; /** MAC address */ struct ipoib_mac mac; - /** LID */ - unsigned int lid; - /** Service level */ - unsigned int sl; - /** Rate */ - unsigned int rate; }; /** Number of IPoIB peer cache entries @@ -352,63 +344,6 @@ struct ll_protocol ipoib_protocol __ll_protocol = { **************************************************************************** */ -/** - * Transmit path record request - * - * @v ipoib IPoIB device - * @v gid Destination GID - * @ret rc Return status code - */ -static int ipoib_get_path_record ( struct ipoib_device *ipoib, - struct ib_gid *gid ) { - struct ib_device *ibdev = ipoib->ibdev; - struct io_buffer *iobuf; - struct ib_mad_sa *sa; - struct ib_address_vector av; - int rc; - - /* Allocate I/O buffer */ - iobuf = alloc_iob ( sizeof ( *sa ) ); - if ( ! iobuf ) - return -ENOMEM; - iob_put ( iobuf, sizeof ( *sa ) ); - sa = iobuf->data; - memset ( sa, 0, sizeof ( *sa ) ); - - /* Construct path record request */ - sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION; - sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; - sa->mad_hdr.class_version = 2; - sa->mad_hdr.method = IB_MGMT_METHOD_GET; - sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); - sa->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC; - sa->mad_hdr.tid[1] = ipoib_meta_tid++; - sa->sa_hdr.comp_mask[1] = - htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); - memcpy ( &sa->sa_data.path_record.dgid, gid, - sizeof ( sa->sa_data.path_record.dgid ) ); - memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid, - sizeof ( sa->sa_data.path_record.sgid ) ); - - /* Construct address vector */ - memset ( &av, 0, sizeof ( av ) ); - av.lid = ibdev->sm_lid; - av.sl = ibdev->sm_sl; - av.qpn = IB_QPN_GMA; - av.qkey = IB_QKEY_GMA; - - /* Post send request */ - if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, - iobuf ) ) != 0 ) { - DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", - ipoib, strerror ( rc ) ); - free_iob ( iobuf ); - return rc; - } - - return 0; -} - /** * Transmit multicast group membership request * @@ -484,7 +419,7 @@ static int ipoib_transmit ( struct net_device *netdev, struct ipoib_hdr *ipoib_hdr; struct ipoib_peer *dest; struct ib_address_vector av; - struct ib_gid *gid; + int rc; /* Sanity check */ if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { @@ -513,21 +448,16 @@ static int ipoib_transmit ( struct net_device *netdev, /* Broadcast */ av.qpn = IB_QPN_BROADCAST; av.lid = ipoib->broadcast_lid; - gid = &ipoib->broadcast_gid; + memcpy ( &av.gid, &ipoib->broadcast_gid, sizeof ( av.gid ) ); } else { /* Unicast */ - if ( ! dest->lid ) { - /* No LID yet - get path record to fetch LID */ - ipoib_get_path_record ( ipoib, &dest->mac.gid ); - return -ENOENT; - } av.qpn = ntohl ( dest->mac.qpn ); - av.lid = dest->lid; - av.rate = dest->rate; - av.sl = dest->sl; - gid = &dest->mac.gid; + memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) ); + if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) { + /* Path not resolved yet */ + return rc; + } } - memcpy ( &av.gid, gid, sizeof ( av.gid ) ); return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf ); } @@ -617,33 +547,6 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, free_iob ( iobuf ); } -/** - * Handle received IPoIB path record - * - * @v ipoib IPoIB device - * @v path_record Path record - */ -static void ipoib_recv_path_record ( struct ipoib_device *ipoib, - struct ib_path_record *path_record ) { - struct ipoib_peer *peer; - - /* Locate peer cache entry */ - peer = ipoib_lookup_peer_by_gid ( &path_record->dgid ); - if ( ! peer ) { - DBGC ( ipoib, "IPoIB %p received unsolicited path record\n", - ipoib ); - return; - } - - /* Update path cache entry */ - peer->lid = ntohs ( path_record->dlid ); - peer->sl = ( path_record->reserved__sl & 0x0f ); - peer->rate = ( path_record->rate_selector__rate & 0x3f ); - - DBG ( "IPoIB peer %x has dlid %x sl %x rate %x\n", - peer->key, peer->lid, peer->sl, peer->rate ); -} - /** * Handle received IPoIB multicast membership record * @@ -710,9 +613,6 @@ ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, } switch ( sa->mad_hdr.tid[0] ) { - case IPOIB_TID_GET_PATH_REC: - ipoib_recv_path_record ( ipoib, &sa->sa_data.path_record ); - break; case IPOIB_TID_MC_MEMBER_REC: ipoib_recv_mc_member_record ( ipoib, &sa->sa_data.mc_member_record ); diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index e8132b47..0eac0a8e 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -145,6 +145,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #define ERRFILE_icmp ( ERRFILE_NET | 0x00190000 ) #define ERRFILE_ib_qset ( ERRFILE_NET | 0x001a0000 ) #define ERRFILE_ib_gma ( ERRFILE_NET | 0x001b0000 ) +#define ERRFILE_ib_pathrec ( ERRFILE_NET | 0x001c0000 ) #define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 ) #define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 ) diff --git a/src/include/gpxe/ib_pathrec.h b/src/include/gpxe/ib_pathrec.h new file mode 100644 index 00000000..44515563 --- /dev/null +++ b/src/include/gpxe/ib_pathrec.h @@ -0,0 +1,17 @@ +#ifndef _GPXE_IB_PATHREC_H +#define _GPXE_IB_PATHREC_H + +/** @file + * + * Infiniband path records + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include + +extern int ib_resolve_path ( struct ib_device *ibdev, + struct ib_address_vector *av ); + +#endif /* _GPXE_IB_PATHREC_H */ diff --git a/src/net/infiniband/ib_pathrec.c b/src/net/infiniband/ib_pathrec.c new file mode 100644 index 00000000..89622d98 --- /dev/null +++ b/src/net/infiniband/ib_pathrec.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2009 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * Infiniband path lookups + * + */ + +/** Number of path record cache entries + * + * Must be a power of two. + */ +#define IB_NUM_CACHED_PATHS 4 + +/** A path record cache entry */ +struct ib_cached_path_record { + /** Infiniband device's port GID + * + * Used to disambiguate cache entries when we have multiple + * Infiniband devices, without having to maintain a pointer to + * the Infiniband device. + */ + struct ib_gid sgid; + /** Destination GID */ + struct ib_gid dgid; + /** Destination LID */ + unsigned int dlid; + /** Rate */ + unsigned int rate; + /** Service level */ + unsigned int sl; +}; + +/** Path record cache */ +static struct ib_cached_path_record ib_path_cache[IB_NUM_CACHED_PATHS]; + +/** Oldest path record cache entry index */ +static unsigned int ib_path_cache_idx; + +/** + * Find path record cache entry + * + * @v ibdev Infiniband device + * @v dgid Destination GID + * @ret cached Path record cache entry, or NULL + */ +static struct ib_cached_path_record * +ib_find_path_cache_entry ( struct ib_device *ibdev, struct ib_gid *dgid ) { + struct ib_cached_path_record *cached; + unsigned int i; + + for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) { + cached = &ib_path_cache[i]; + if ( memcmp ( &cached->sgid, &ibdev->gid, + sizeof ( cached->sgid ) ) != 0 ) + continue; + if ( memcmp ( &cached->dgid, dgid, + sizeof ( cached->dgid ) ) != 0 ) + continue; + return cached; + } + + return NULL; +} + +/** + * Resolve path record + * + * @v ibdev Infiniband device + * @v av Address vector to complete + * @ret rc Return status code + */ +int ib_resolve_path ( struct ib_device *ibdev, + struct ib_address_vector *av ) { + struct ib_gid *gid = &av->gid; + struct ib_cached_path_record *cached; + union ib_mad mad; + struct ib_mad_sa *sa = &mad.sa; + unsigned int cache_idx; + int rc; + + /* Sanity check */ + if ( ! av->gid_present ) { + DBGC ( ibdev, "IBDEV %p attempt to look up path record " + "without GID\n", ibdev ); + return -EINVAL; + } + + /* Look in cache for a matching entry */ + cached = ib_find_path_cache_entry ( ibdev, gid ); + if ( cached && cached->dlid ) { + /* Populated entry found */ + av->lid = cached->dlid; + av->rate = cached->rate; + av->sl = cached->sl; + DBGC2 ( ibdev, "IBDEV %p cache hit for %08x:%08x:%08x:%08x\n", + ibdev, htonl ( gid->u.dwords[0] ), + htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ), + htonl ( gid->u.dwords[3] ) ); + return 0; + } + DBGC ( ibdev, "IBDEV %p cache miss for %08x:%08x:%08x:%08x%s\n", ibdev, + htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ), + htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ), + ( cached ? " (in progress)" : "" ) ); + + /* If no unresolved entry was found, then create a new one */ + if ( ! cached ) { + cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS ); + cached = &ib_path_cache[cache_idx]; + memset ( cached, 0, sizeof ( *cached ) ); + memcpy ( &cached->sgid, &ibdev->gid, sizeof ( cached->sgid ) ); + memcpy ( &cached->dgid, gid, sizeof ( cached->dgid ) ); + } + + /* Construct path record request */ + memset ( sa, 0, sizeof ( *sa ) ); + sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + sa->mad_hdr.class_version = IB_SA_CLASS_VERSION; + sa->mad_hdr.method = IB_MGMT_METHOD_GET; + sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + sa->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &sa->sa_data.path_record.dgid, &cached->dgid, + sizeof ( sa->sa_data.path_record.dgid ) ); + memcpy ( &sa->sa_data.path_record.sgid, &cached->sgid, + sizeof ( sa->sa_data.path_record.sgid ) ); + + /* Issue path record request */ + if ( ( rc = ib_gma_request ( &ibdev->gma, &mad, NULL ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not get path record: %s\n", + ibdev, strerror ( rc ) ); + return rc; + } + + /* Not found yet */ + return -ENOENT; +} + +/** + * Handle path record response + * + * @v ibdev Infiniband device + * @v mad MAD + * @ret rc Return status code + */ +static int ib_handle_path_record ( struct ib_device *ibdev, + union ib_mad *mad ) { + struct ib_path_record *path_record = &mad->sa.sa_data.path_record; + struct ib_gid *dgid = &path_record->dgid; + struct ib_cached_path_record *cached; + unsigned int dlid; + unsigned int sl; + unsigned int rate; + + /* Ignore if not a success */ + if ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ) { + DBGC ( ibdev, "IBDEV %p path record lookup failed with status " + "%04x\n", ibdev, ntohs ( mad->hdr.status ) ); + return -EINVAL; + } + + /* Extract values from MAD */ + dlid = ntohs ( path_record->dlid ); + sl = ( path_record->reserved__sl & 0x0f ); + rate = ( path_record->rate_selector__rate & 0x3f ); + DBGC ( ibdev, "IBDEV %p path to %08x:%08x:%08x:%08x is %04x sl %d " + "rate %d\n", ibdev, htonl ( dgid->u.dwords[0] ), + htonl ( dgid->u.dwords[1] ), htonl ( dgid->u.dwords[2] ), + htonl ( dgid->u.dwords[3] ), dlid, sl, rate ); + + /* Look for a matching cache entry to fill in */ + if ( ( cached = ib_find_path_cache_entry ( ibdev, dgid ) ) != NULL ) { + DBGC ( ibdev, "IBDEV %p cache add for %08x:%08x:%08x:%08x\n", + ibdev, htonl ( dgid->u.dwords[0] ), + htonl ( dgid->u.dwords[1] ), + htonl ( dgid->u.dwords[2] ), + htonl ( dgid->u.dwords[3] ) ); + cached->dlid = dlid; + cached->rate = rate; + cached->sl = sl; + } + + return 0; +} + +/** Path record response handler */ +struct ib_mad_handler ib_path_record_handler __ib_mad_handler = { + .mgmt_class = IB_MGMT_CLASS_SUBN_ADM, + .class_version = IB_SA_CLASS_VERSION, + .method = IB_MGMT_METHOD_GET_RESP, + .attr_id = htons ( IB_SA_ATTR_PATH_REC ), + .handle = ib_handle_path_record, +};