david/ipxe
david
/
ipxe
Archived
1
0
Fork 0

[Infiniband] Add preliminary multiple port support for Hermon cards

Infiniband devices no longer block waiting for link-up in
register_ibdev().

Hermon driver needs to create an event queue and poll for link-up events.

Infiniband core needs to reread MAD parameters when link state changes.

IPoIB needs to cope with Infiniband link parameters being only partially
available at probe and open time.
This commit is contained in:
Michael Brown 2008-04-18 02:50:48 +01:00
parent 3475b693b7
commit a176a24ac0
6 changed files with 579 additions and 143 deletions

View File

@ -30,6 +30,7 @@
#include <gpxe/umalloc.h>
#include <gpxe/iobuf.h>
#include <gpxe/netdevice.h>
#include <gpxe/process.h>
#include <gpxe/infiniband.h>
#include "hermon.h"
@ -317,19 +318,30 @@ hermon_cmd_write_mtt ( struct hermon *hermon,
}
static inline int
hermon_cmd_sw2hw_eq ( struct hermon *hermon, unsigned int index,
const struct hermonprm_eqc *eqc ) {
hermon_cmd_map_eq ( struct hermon *hermon, unsigned long index_map,
const struct hermonprm_event_mask *mask ) {
return hermon_cmd ( hermon,
HERMON_HCR_IN_CMD ( HERMON_HCR_SW2HW_EQ,
1, sizeof ( *eqc ) ),
0, eqc, index, NULL );
HERMON_HCR_IN_CMD ( HERMON_HCR_MAP_EQ,
0, sizeof ( *mask ) ),
0, mask, index_map, NULL );
}
static inline int
hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index ) {
hermon_cmd_sw2hw_eq ( struct hermon *hermon, unsigned int index,
const struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_VOID_CMD ( HERMON_HCR_HW2SW_EQ ),
1, NULL, index, NULL );
HERMON_HCR_IN_CMD ( HERMON_HCR_SW2HW_EQ,
1, sizeof ( *eqctx ) ),
0, eqctx, index, NULL );
}
static inline int
hermon_cmd_hw2sw_eq ( struct hermon *hermon, unsigned int index,
struct hermonprm_eqc *eqctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_OUT_CMD ( HERMON_HCR_HW2SW_EQ,
1, sizeof ( *eqctx ) ),
1, NULL, index, eqctx );
}
static inline int
@ -377,6 +389,15 @@ hermon_cmd_rtr2rts_qp ( struct hermon *hermon, unsigned long qpn,
0, ctx, qpn, NULL );
}
static inline int
hermon_cmd_rts2rts_qp ( struct hermon *hermon, unsigned long qpn,
const struct hermonprm_qp_ee_state_transitions *ctx ) {
return hermon_cmd ( hermon,
HERMON_HCR_IN_CMD ( HERMON_HCR_RTS2RTS_QP,
1, sizeof ( *ctx ) ),
0, ctx, qpn, NULL );
}
static inline int
hermon_cmd_2rst_qp ( struct hermon *hermon, unsigned long qpn ) {
return hermon_cmd ( hermon,
@ -859,6 +880,39 @@ static int hermon_create_qp ( struct ib_device *ibdev,
return rc;
}
/**
* Modify queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v mod_list Modification list
* @ret rc Return status code
*/
static int hermon_modify_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp,
unsigned long mod_list ) {
struct hermon *hermon = ib_get_drvdata ( ibdev );
struct hermonprm_qp_ee_state_transitions qpctx;
unsigned long optparammask = 0;
int rc;
/* Construct optparammask */
if ( mod_list & IB_MODIFY_QKEY )
optparammask |= HERMON_QP_OPT_PARAM_QKEY;
/* Issue RTS2RTS_QP */
memset ( &qpctx, 0, sizeof ( qpctx ) );
MLX_FILL_1 ( &qpctx, 0, opt_param_mask, optparammask );
MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey );
if ( ( rc = hermon_cmd_rts2rts_qp ( hermon, qp->qpn, &qpctx ) ) != 0 ){
DBGC ( hermon, "Hermon %p RTS2RTS_QP failed: %s\n",
hermon, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Destroy queue pair
*
@ -1356,6 +1410,7 @@ static struct ib_device_operations hermon_ib_operations = {
.create_cq = hermon_create_cq,
.destroy_cq = hermon_destroy_cq,
.create_qp = hermon_create_qp,
.modify_qp = hermon_modify_qp,
.destroy_qp = hermon_destroy_qp,
.post_send = hermon_post_send,
.post_recv = hermon_post_recv,
@ -1367,6 +1422,211 @@ static struct ib_device_operations hermon_ib_operations = {
.mad = hermon_mad,
};
/***************************************************************************
*
* Event queues
*
***************************************************************************
*/
/**
* Create event queue
*
* @v hermon Hermon device
* @ret rc Return status code
*/
static int hermon_create_eq ( struct hermon *hermon ) {
struct hermon_event_queue *hermon_eq = &hermon->eq;
struct hermonprm_eqc eqctx;
struct hermonprm_event_mask mask;
unsigned int i;
int rc;
/* Allocate event queue itself */
hermon_eq->eqe_size =
( HERMON_NUM_EQES * sizeof ( hermon_eq->eqe[0] ) );
hermon_eq->eqe = malloc_dma ( hermon_eq->eqe_size,
sizeof ( hermon_eq->eqe[0] ) );
if ( ! hermon_eq->eqe ) {
rc = -ENOMEM;
goto err_eqe;
}
memset ( hermon_eq->eqe, 0, hermon_eq->eqe_size );
for ( i = 0 ; i < HERMON_NUM_EQES ; i++ ) {
MLX_FILL_1 ( &hermon_eq->eqe[i].generic, 7, owner, 1 );
}
barrier();
/* Allocate MTT entries */
if ( ( rc = hermon_alloc_mtt ( hermon, hermon_eq->eqe,
hermon_eq->eqe_size,
&hermon_eq->mtt ) ) != 0 )
goto err_alloc_mtt;
/* Hand queue over to hardware */
memset ( &eqctx, 0, sizeof ( eqctx ) );
MLX_FILL_1 ( &eqctx, 0, st, 0xa /* "Fired" */ );
MLX_FILL_1 ( &eqctx, 2,
page_offset, ( hermon_eq->mtt.page_offset >> 5 ) );
MLX_FILL_1 ( &eqctx, 3, log_eq_size, fls ( HERMON_NUM_EQES - 1 ) );
MLX_FILL_1 ( &eqctx, 7, mtt_base_addr_l,
( hermon_eq->mtt.mtt_base_addr >> 3 ) );
if ( ( rc = hermon_cmd_sw2hw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
DBGC ( hermon, "Hermon %p SW2HW_EQ failed: %s\n",
hermon, strerror ( rc ) );
goto err_sw2hw_eq;
}
/* Map events to this event queue */
memset ( &mask, 0, sizeof ( mask ) );
MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_MAP | 0 ),
&mask ) ) != 0 ) {
DBGC ( hermon, "Hermon %p MAP_EQ failed: %s\n",
hermon, strerror ( rc ) );
goto err_map_eq;
}
return 0;
err_map_eq:
hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx );
err_sw2hw_eq:
hermon_free_mtt ( hermon, &hermon_eq->mtt );
err_alloc_mtt:
free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
err_eqe:
memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
return rc;
}
/**
* Destroy event queue
*
* @v hermon Hermon device
*/
static void hermon_destroy_eq ( struct hermon *hermon ) {
struct hermon_event_queue *hermon_eq = &hermon->eq;
struct hermonprm_eqc eqctx;
struct hermonprm_event_mask mask;
int rc;
/* Unmap events from event queue */
memset ( &mask, 0, sizeof ( mask ) );
MLX_FILL_1 ( &mask, 1, port_state_change, 1 );
if ( ( rc = hermon_cmd_map_eq ( hermon, ( HERMON_MAP_EQ_UNMAP | 0 ),
&mask ) ) != 0 ) {
DBGC ( hermon, "Hermon %p FATAL MAP_EQ failed to unmap: %s\n",
hermon, strerror ( rc ) );
/* Continue; HCA may die but system should survive */
}
/* Take ownership back from hardware */
if ( ( rc = hermon_cmd_hw2sw_eq ( hermon, 0, &eqctx ) ) != 0 ) {
DBGC ( hermon, "Hermon %p FATAL HW2SW_EQ failed: %s\n",
hermon, strerror ( rc ) );
/* Leak memory and return; at least we avoid corruption */
return;
}
/* Free MTT entries */
hermon_free_mtt ( hermon, &hermon_eq->mtt );
/* Free memory */
free_dma ( hermon_eq->eqe, hermon_eq->eqe_size );
memset ( hermon_eq, 0, sizeof ( *hermon_eq ) );
}
/**
* Handle port state event
*
* @v hermon Hermon device
* @v eqe Port state change event queue entry
*/
static void hermon_event_port_state_change ( struct hermon *hermon,
union hermonprm_event_entry *eqe){
unsigned int port;
int link_up;
/* Get port and link status */
port = ( MLX_GET ( &eqe->port_state_change, data.p ) - 1 );
link_up = ( MLX_GET ( &eqe->generic, event_sub_type ) & 0x04 );
DBGC ( hermon, "Hermon %p port %d link %s\n", hermon, ( port + 1 ),
( link_up ? "up" : "down" ) );
/* Sanity check */
if ( port >= HERMON_NUM_PORTS ) {
DBGC ( hermon, "Hermon %p port %d does not exist!\n",
hermon, ( port + 1 ) );
return;
}
/* Notify Infiniband core of link state change */
ib_link_state_changed ( hermon->ibdev[port] );
}
/**
* Poll event queue
*
* @v hermon Hermon device
*/
static void hermon_poll_eq ( struct hermon *hermon ) {
struct hermon_event_queue *hermon_eq = &hermon->eq;
union hermonprm_event_entry *eqe;
union hermonprm_doorbell_register db_reg;
unsigned int eqe_idx_mask;
unsigned int event_type;
while ( 1 ) {
eqe_idx_mask = ( HERMON_NUM_EQES - 1 );
eqe = &hermon_eq->eqe[hermon_eq->next_idx & eqe_idx_mask];
if ( MLX_GET ( &eqe->generic, owner ) ^
( ( hermon_eq->next_idx & HERMON_NUM_EQES ) ? 1 : 0 ) ) {
/* Entry still owned by hardware; end of poll */
break;
}
DBGCP ( hermon, "Hermon %p event:\n", hermon );
DBGCP_HD ( hermon, eqe, sizeof ( *eqe ) );
/* Handle event */
event_type = MLX_GET ( &eqe->generic, event_type );
switch ( event_type ) {
case HERMON_EV_PORT_STATE_CHANGE:
hermon_event_port_state_change ( hermon, eqe );
break;
default:
DBGC ( hermon, "Hermon %p unrecognised event type "
"%#x:\n", hermon, event_type );
DBGC_HD ( hermon, eqe, sizeof ( *eqe ) );
break;
}
/* Update event queue's index */
hermon_eq->next_idx++;
/* Ring doorbell */
memset ( &db_reg, 0, sizeof ( db_reg ) );
MLX_FILL_1 ( &db_reg.event, 0, ci, hermon_eq->next_idx );
DBGCP ( hermon, "Ringing doorbell %08lx with %08lx\n",
virt_to_phys ( hermon->uar + HERMON_DB_EQ0_OFFSET ),
db_reg.dword[0] );
writel ( db_reg.dword[0],
( hermon->uar + HERMON_DB_EQ0_OFFSET ) );
}
}
/**
* Event queue poll processor
*
* @v process Hermon event queue process
*/
static void hermon_step ( struct process *process ) {
struct hermon *hermon =
container_of ( process, struct hermon, event_process );
hermon_poll_eq ( hermon );
}
/***************************************************************************
*
* Firmware control
@ -1879,6 +2139,7 @@ static int hermon_probe ( struct pci_device *pci,
goto err_alloc_hermon;
}
pci_set_drvdata ( pci, hermon );
process_init ( &hermon->event_process, hermon_step, NULL );
/* Allocate Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
@ -1945,6 +2206,10 @@ static int hermon_probe ( struct pci_device *pci,
if ( ( rc = hermon_setup_mpt ( hermon ) ) != 0 )
goto err_setup_mpt;
/* Set up event queue */
if ( ( rc = hermon_create_eq ( hermon ) ) != 0 )
goto err_create_eq;
/* Register Infiniband devices */
for ( i = 0 ; i < HERMON_NUM_PORTS ; i++ ) {
if ( ( rc = register_ibdev ( hermon->ibdev[i] ) ) != 0 ) {
@ -1960,6 +2225,8 @@ static int hermon_probe ( struct pci_device *pci,
err_register_ibdev:
for ( ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
hermon_destroy_eq ( hermon );
err_create_eq:
err_setup_mpt:
hermon_cmd_close_hca ( hermon );
err_init_hca:
@ -1976,6 +2243,7 @@ static int hermon_probe ( struct pci_device *pci,
err_alloc_ibdev:
for ( ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
process_del ( &hermon->event_process );
free ( hermon );
err_alloc_hermon:
return rc;
@ -1992,6 +2260,7 @@ static void hermon_remove ( struct pci_device *pci ) {
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
unregister_ibdev ( hermon->ibdev[i] );
hermon_destroy_eq ( hermon );
hermon_cmd_close_hca ( hermon );
hermon_free_icm ( hermon );
hermon_stop_firmware ( hermon );
@ -2000,6 +2269,7 @@ static void hermon_remove ( struct pci_device *pci ) {
free_dma ( hermon->mailbox_in, HERMON_MBOX_SIZE );
for ( i = ( HERMON_NUM_PORTS - 1 ) ; i >= 0 ; i-- )
free_ibdev ( hermon->ibdev[i] );
process_del ( &hermon->event_process );
free ( hermon );
}

View File

@ -9,6 +9,7 @@
#include <stdint.h>
#include <gpxe/uaccess.h>
#include <gpxe/process.h>
#include "mlx_bitops.h"
#include "MT25408_PRM.h"
@ -18,7 +19,7 @@
*/
/* Ports in existence */
#define HERMON_NUM_PORTS 1
#define HERMON_NUM_PORTS 2
#define HERMON_PORT_BASE 1
/* PCI BARs */
@ -48,6 +49,7 @@
#define HERMON_HCR_RST2INIT_QP 0x0019
#define HERMON_HCR_INIT2RTR_QP 0x001a
#define HERMON_HCR_RTR2RTS_QP 0x001b
#define HERMON_HCR_RTS2RTS_QP 0x001c
#define HERMON_HCR_2RST_QP 0x0021
#define HERMON_HCR_MAD_IFC 0x0024
#define HERMON_HCR_READ_MCG 0x0025
@ -75,6 +77,14 @@
#define HERMON_PAGE_SIZE 4096
#define HERMON_DB_POST_SND_OFFSET 0x14
#define HERMON_DB_EQ0_OFFSET 0x800
#define HERMON_QP_OPT_PARAM_QKEY 0x00000020UL
#define HERMON_MAP_EQ_MAP ( 0UL << 31 )
#define HERMON_MAP_EQ_UNMAP ( 1UL << 31 )
#define HERMON_EV_PORT_STATE_CHANGE 0x09
/*
* Datatypes that seem to be missing from the autogenerated documentation
@ -108,12 +118,32 @@ struct hermonprm_send_db_register_st {
pseudo_bit_t qn[0x00018];
} __attribute__ (( packed ));
struct hermonprm_event_db_register_st {
pseudo_bit_t ci[0x00018];
pseudo_bit_t reserver[0x00007];
pseudo_bit_t a[0x00001];
} __attribute__ (( packed ));
struct hermonprm_scalar_parameter_st {
pseudo_bit_t value_hi[0x00020];
/* -------------- */
pseudo_bit_t value[0x00020];
} __attribute__ (( packed ));
struct hermonprm_event_mask_st {
pseudo_bit_t reserved0[0x00020];
/* -------------- */
pseudo_bit_t completion[0x00001];
pseudo_bit_t reserved1[0x0008];
pseudo_bit_t port_state_change[0x00001];
pseudo_bit_t reserved2[0x00016];
} __attribute__ (( packed ));
struct hermonprm_port_state_change_event_st {
pseudo_bit_t reserved[0x00020];
struct hermonprm_port_state_change_st data;
} __attribute__ (( packed ));
/*
* Wrapper structures for hardware datatypes
*
@ -124,6 +154,9 @@ struct MLX_DECLARE_STRUCT ( hermonprm_completion_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_completion_with_error );
struct MLX_DECLARE_STRUCT ( hermonprm_cq_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_eqc );
struct MLX_DECLARE_STRUCT ( hermonprm_event_db_register );
struct MLX_DECLARE_STRUCT ( hermonprm_event_mask );
struct MLX_DECLARE_STRUCT ( hermonprm_event_queue_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_hca_command_register );
struct MLX_DECLARE_STRUCT ( hermonprm_init_hca );
struct MLX_DECLARE_STRUCT ( hermonprm_init_port );
@ -132,6 +165,7 @@ struct MLX_DECLARE_STRUCT ( hermonprm_mcg_entry );
struct MLX_DECLARE_STRUCT ( hermonprm_mgm_hash );
struct MLX_DECLARE_STRUCT ( hermonprm_mpt );
struct MLX_DECLARE_STRUCT ( hermonprm_mtt );
struct MLX_DECLARE_STRUCT ( hermonprm_port_state_change_event );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_db_record );
struct MLX_DECLARE_STRUCT ( hermonprm_qp_ee_state_transitions );
struct MLX_DECLARE_STRUCT ( hermonprm_query_dev_cap );
@ -175,8 +209,14 @@ union hermonprm_completion_entry {
struct hermonprm_completion_with_error error;
} __attribute__ (( packed ));
union hermonprm_event_entry {
struct hermonprm_event_queue_entry generic;
struct hermonprm_port_state_change_event port_state_change;
} __attribute__ (( packed ));
union hermonprm_doorbell_register {
struct hermonprm_send_db_register send;
struct hermonprm_event_db_register event;
uint32_t dword[1];
} __attribute__ (( packed ));
@ -362,6 +402,24 @@ struct hermon_completion_queue {
*/
#define HERMON_MAX_EQS 4
/** A Hermon event queue */
struct hermon_event_queue {
/** Event queue entries */
union hermonprm_event_entry *eqe;
/** Size of event queue */
size_t eqe_size;
/** MTT descriptor */
struct hermon_mtt mtt;
/** Next event queue entry index */
unsigned long next_idx;
};
/** Number of event queue entries
*
* This is a policy decision.
*/
#define HERMON_NUM_EQES 4
/** A Hermon resource bitmask */
typedef uint32_t hermon_bitmask_t;
@ -397,6 +455,11 @@ struct hermon {
*/
unsigned long reserved_lkey;
/** Event queue */
struct hermon_event_queue eq;
/** Event queue process */
struct process event_process;
/** Completion queue in-use bitmask */
hermon_bitmask_t cq_inuse[ HERMON_BITMASK_SIZE ( HERMON_MAX_CQS ) ];
/** Queue pair in-use bitmask */

View File

@ -80,10 +80,14 @@ struct ipoib_device {
struct ib_gid broadcast_gid;
/** Broadcast LID */
unsigned int broadcast_lid;
/** Joined to broadcast group */
int broadcast_joined;
/** Data queue key */
unsigned long data_qkey;
/** Attached to multicast group
*
* This flag indicates whether or not we have attached our
* data queue pair to the broadcast multicast GID.
*/
int broadcast_attached;
};
/**
@ -272,6 +276,10 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib,
struct ib_device *ibdev = ipoib->ibdev;
int rc;
/* Sanity check */
assert ( qset->cq == NULL );
assert ( qset->qp == NULL );
/* Store queue parameters */
qset->recv_max_fill = num_recv_wqes;
@ -617,14 +625,24 @@ static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
*/
static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
struct ib_mad_mc_member_record *mc_member_record ) {
int joined;
int rc;
/* Record parameters */
ipoib->broadcast_joined =
( mc_member_record->scope__join_state & 0x0f );
joined = ( mc_member_record->scope__join_state & 0x0f );
ipoib->data_qkey = ntohl ( mc_member_record->qkey );
ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ),
ipoib->data_qkey, ipoib->broadcast_lid );
ipoib, ( joined ? "joined" : "left" ), ipoib->data_qkey,
ipoib->broadcast_lid );
/* Update data queue pair qkey */
if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
IB_MODIFY_QKEY, ipoib->data_qkey ) ) != 0 ){
DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
ipoib, strerror ( rc ) );
return;
}
}
/**
@ -741,6 +759,56 @@ static void ipoib_irq ( struct net_device *netdev __unused,
/* No implementation */
}
/**
* Join IPv4 broadcast multicast group
*
* @v ipoib IPoIB device
* @ret rc Return status code
*/
static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
int rc;
/* Sanity check */
if ( ! ipoib->data.qp )
return 0;
/* Attach data queue to broadcast multicast GID */
assert ( ipoib->broadcast_attached == 0 );
if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
&ipoib->broadcast_gid ) ) != 0 ){
DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
"%s\n", ipoib, strerror ( rc ) );
return rc;
}
ipoib->broadcast_attached = 1;
/* Initiate broadcast group join */
if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
1 ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
ipoib, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Leave IPv4 broadcast multicast group
*
* @v ipoib IPoIB device
*/
static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
/* Detach data queue from broadcast multicast GID */
if ( ipoib->broadcast_attached ) {
assert ( ipoib->data.qp != NULL );
ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
&ipoib->broadcast_gid );
ipoib->broadcast_attached = 0;
}
}
/**
* Open IPoIB network device
*
@ -749,22 +817,53 @@ static void ipoib_irq ( struct net_device *netdev __unused,
*/
static int ipoib_open ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
struct ib_device *ibdev = ipoib->ibdev;
struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
int rc;
/* Attach to broadcast multicast GID */
if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp,
&ipoib->broadcast_gid ) ) != 0 ) {
DBG ( "Could not attach to broadcast GID: %s\n",
strerror ( rc ) );
return rc;
/* Allocate metadata queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
IPOIB_META_NUM_CQES,
IPOIB_META_NUM_SEND_WQES,
IPOIB_META_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_meta_qset;
}
/* Allocate data queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
IPOIB_DATA_NUM_CQES,
IPOIB_DATA_NUM_SEND_WQES,
IPOIB_DATA_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_data_qset;
}
/* Update MAC address with data QPN */
mac->qpn = htonl ( ipoib->data.qp->qpn );
/* Fill receive rings */
ipoib_refill_recv ( ipoib, &ipoib->meta );
ipoib_refill_recv ( ipoib, &ipoib->data );
/* Join broadcast group */
if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
ipoib, strerror ( rc ) );
goto err_join_broadcast;
}
return 0;
err_join_broadcast:
ipoib_destroy_qset ( ipoib, &ipoib->data );
err_create_data_qset:
ipoib_destroy_qset ( ipoib, &ipoib->meta );
err_create_meta_qset:
return rc;
}
/**
@ -774,12 +873,17 @@ static int ipoib_open ( struct net_device *netdev ) {
*/
static void ipoib_close ( struct net_device *netdev ) {
struct ipoib_device *ipoib = netdev->priv;
struct ib_device *ibdev = ipoib->ibdev;
struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
/* Detach from broadcast multicast GID */
ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid );
/* Leave broadcast group */
ipoib_leave_broadcast_group ( ipoib );
/* FIXME: should probably flush the receive ring */
/* Remove data QPN from MAC address */
mac->qpn = 0;
/* Tear down the queues */
ipoib_destroy_qset ( ipoib, &ipoib->data );
ipoib_destroy_qset ( ipoib, &ipoib->meta );
}
/** IPoIB network device operations */
@ -792,44 +896,53 @@ static struct net_device_operations ipoib_operations = {
};
/**
* Join IPoIB broadcast group
* Update IPoIB dynamic Infiniband parameters
*
* @v ipoib IPoIB device
* @ret rc Return status code
*
* The Infiniband port GID and partition key will change at runtime,
* when the link is established (or lost). The MAC address is based
* on the port GID, and the broadcast GID is based on the partition
* key. This function recalculates these IPoIB device parameters.
*/
static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
struct ib_device *ibdev = ipoib->ibdev;
unsigned int delay_ms;
struct ipoib_mac *mac;
/* Calculate GID portion of MAC address based on port GID */
mac = ( ( struct ipoib_mac * ) ipoib->netdev->ll_addr );
memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
/* Calculate broadcast GID based on partition key */
memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
sizeof ( ipoib->broadcast_gid ) );
ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
}
/**
* Handle link status change
*
* @v ibdev Infiniband device
*/
void ipoib_link_state_changed ( struct ib_device *ibdev ) {
struct net_device *netdev = ib_get_ownerdata ( ibdev );
struct ipoib_device *ipoib = netdev->priv;
int rc;
/* Make sure we have some receive descriptors */
ipoib_refill_recv ( ipoib, &ipoib->meta );
/* Leave existing broadcast group */
ipoib_leave_broadcast_group ( ipoib );
/* Send join request */
if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
1 ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
ipoib, strerror ( rc ) );
return rc;
}
/* Wait for join to complete. Ideally we wouldn't delay for
* this long, but we need the queue key before we can set up
* the data queue pair, which we need before we can know the
* MAC address.
/* Update MAC address and broadcast GID based on new port GID
* and partition key.
*/
for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) {
mdelay ( 1 );
ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
ipoib_meta_complete_recv );
ipoib_refill_recv ( ipoib, &ipoib->meta );
if ( ipoib->broadcast_joined )
return 0;
}
DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n",
ipoib );
ipoib_set_ib_params ( ipoib );
return -ETIMEDOUT;
/* Join new broadcast group */
if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
"%s\n", ipoib, strerror ( rc ) );
return;
}
}
/**
@ -841,7 +954,6 @@ static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
int ipoib_probe ( struct ib_device *ibdev ) {
struct net_device *netdev;
struct ipoib_device *ipoib;
struct ipoib_mac *mac;
int rc;
/* Allocate network device */
@ -856,44 +968,11 @@ int ipoib_probe ( struct ib_device *ibdev ) {
ipoib->netdev = netdev;
ipoib->ibdev = ibdev;
/* Calculate broadcast GID */
memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
sizeof ( ipoib->broadcast_gid ) );
ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
/* Allocate metadata queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
IPOIB_META_NUM_CQES,
IPOIB_META_NUM_SEND_WQES,
IPOIB_META_NUM_RECV_WQES,
IB_GLOBAL_QKEY ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_meta_qset;
}
/* Join broadcast group */
if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
ipoib, strerror ( rc ) );
goto err_join_broadcast_group;
}
/* Allocate data queue set */
if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
IPOIB_DATA_NUM_CQES,
IPOIB_DATA_NUM_SEND_WQES,
IPOIB_DATA_NUM_RECV_WQES,
ipoib->data_qkey ) ) != 0 ) {
DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
ipoib, strerror ( rc ) );
goto err_create_data_qset;
}
/* Construct MAC address */
mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
mac->qpn = htonl ( ipoib->data.qp->qpn );
memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
/* Calculate as much of the broadcast GID and the MAC address
* as we can. We won't know either of these in full until we
* have link-up.
*/
ipoib_set_ib_params ( ipoib );
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
@ -902,11 +981,6 @@ int ipoib_probe ( struct ib_device *ibdev ) {
return 0;
err_register_netdev:
ipoib_destroy_qset ( ipoib, &ipoib->data );
err_join_broadcast_group:
err_create_data_qset:
ipoib_destroy_qset ( ipoib, &ipoib->meta );
err_create_meta_qset:
netdev_nullify ( netdev );
netdev_put ( netdev );
return rc;
@ -919,11 +993,8 @@ int ipoib_probe ( struct ib_device *ibdev ) {
*/
void ipoib_remove ( struct ib_device *ibdev ) {
struct net_device *netdev = ib_get_ownerdata ( ibdev );
struct ipoib_device *ipoib = netdev->priv;
unregister_netdev ( netdev );
ipoib_destroy_qset ( ipoib, &ipoib->data );
ipoib_destroy_qset ( ipoib, &ipoib->meta );
netdev_nullify ( netdev );
netdev_put ( netdev );
}

View File

@ -95,6 +95,11 @@ struct ib_queue_pair {
void *owner_priv;
};
/** Infiniband queue pair modification flags */
enum ib_queue_pair_mods {
IB_MODIFY_QKEY = 0x0001,
};
/** An Infiniband Completion Queue */
struct ib_completion_queue {
/** Completion queue number */
@ -187,6 +192,16 @@ struct ib_device_operations {
*/
int ( * create_qp ) ( struct ib_device *ibdev,
struct ib_queue_pair *qp );
/** Modify queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v mod_list Modification list
* @ret rc Return status code
*/
int ( * modify_qp ) ( struct ib_device *ibdev,
struct ib_queue_pair *qp,
unsigned long mod_list );
/** Destroy queue pair
*
* @v ibdev Infiniband device
@ -291,6 +306,8 @@ struct ib_device {
struct ib_device_operations *op;
/** Port number */
unsigned int port;
/** Link state */
int link_up;
/** Port GID */
struct ib_gid port_gid;
/** Subnet manager LID */
@ -311,6 +328,8 @@ extern struct ib_queue_pair *
ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes,
struct ib_completion_queue *send_cq, unsigned int num_recv_wqes,
struct ib_completion_queue *recv_cq, unsigned long qkey );
extern int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
unsigned long mod_list, unsigned long qkey );
extern void ib_destroy_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp );
extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
@ -319,6 +338,7 @@ extern struct ib_device * alloc_ibdev ( size_t priv_size );
extern int register_ibdev ( struct ib_device *ibdev );
extern void unregister_ibdev ( struct ib_device *ibdev );
extern void free_ibdev ( struct ib_device *ibdev );
extern void ib_link_state_changed ( struct ib_device *ibdev );
/**
* Post send work queue entry

View File

@ -72,6 +72,7 @@ static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) {
return netdev;
}
extern void ipoib_link_state_changed ( struct ib_device *ibdev );
extern int ipoib_probe ( struct ib_device *ibdev );
extern void ipoib_remove ( struct ib_device *ibdev );

View File

@ -152,15 +152,41 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
return qp;
}
/**
* Modify queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v mod_list Modification list
* @v qkey New queue key, if applicable
* @ret rc Return status code
*/
int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
unsigned long mod_list, unsigned long qkey ) {
int rc;
DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
if ( mod_list & IB_MODIFY_QKEY )
qp->qkey = qkey;
if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
ibdev, qp->qpn, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Destroy queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
*/
void ib_destroy_qp ( struct ib_device *ibdev,
struct ib_queue_pair *qp ) {
DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n",
void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
ibdev, qp->qpn );
ibdev->op->destroy_qp ( ibdev, qp );
list_del ( &qp->send.list );
@ -279,38 +305,6 @@ static int ib_get_pkey_table ( struct ib_device *ibdev,
return 0;
}
/**
* Wait for link up
*
* @v ibdev Infiniband device
* @ret rc Return status code
*
* This function shouldn't really exist. Unfortunately, IB links take
* a long time to come up, and we can't get various key parameters
* e.g. our own IPoIB MAC address without information from the subnet
* manager). We should eventually make link-up an asynchronous event.
*/
static int ib_wait_for_link ( struct ib_device *ibdev ) {
struct ib_mad_port_info port_info;
unsigned int retries;
int rc;
printf ( "Waiting for Infiniband link-up..." );
for ( retries = 20 ; retries ; retries-- ) {
if ( ( rc = ib_get_port_info ( ibdev, &port_info ) ) != 0 )
continue;
if ( ( ( port_info.port_state__link_speed_supported ) & 0xf )
== 4 ) {
printf ( "ok\n" );
return 0;
}
printf ( "." );
sleep ( 1 );
}
printf ( "failed\n" );
return -ENODEV;
};
/**
* Get MAD parameters
*
@ -326,9 +320,13 @@ static int ib_get_mad_params ( struct ib_device *ibdev ) {
} u;
int rc;
/* Port info gives us the first half of the port GID and the SM LID */
/* Port info gives us the link state, the first half of the
* port GID and the SM LID.
*/
if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 )
return rc;
ibdev->link_up = ( ( u.port_info.port_state__link_speed_supported
& 0xf ) == 4 );
memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 );
ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid );
@ -391,10 +389,6 @@ int register_ibdev ( struct ib_device *ibdev ) {
if ( ( rc = ib_open ( ibdev ) ) != 0 )
goto err_open;
/* Wait for link */
if ( ( rc = ib_wait_for_link ( ibdev ) ) != 0 )
goto err_wait_for_link;
/* Get MAD parameters */
if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 )
goto err_get_mad_params;
@ -410,7 +404,6 @@ int register_ibdev ( struct ib_device *ibdev ) {
err_ipoib_probe:
err_get_mad_params:
err_wait_for_link:
ib_close ( ibdev );
err_open:
return rc;
@ -435,3 +428,21 @@ void free_ibdev ( struct ib_device *ibdev ) {
free ( ibdev );
}
/**
* Handle Infiniband link state change
*
* @v ibdev Infiniband device
*/
void ib_link_state_changed ( struct ib_device *ibdev ) {
int rc;
/* Update MAD parameters */
if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %p could not update MAD parameters: %s\n",
ibdev, strerror ( rc ) );
return;
}
/* Notify IPoIB of link state change */
ipoib_link_state_changed ( ibdev );
}